From a3e1a15f8e7da1c9474b394b7a3cbea382be7372 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 7 Aug 2024 11:16:29 +0200 Subject: [PATCH 001/234] Update the experiments setup json schema --- src/autotuning_methodology/schema.json | 373 +++++++++++++++++++------ 1 file changed, 280 insertions(+), 93 deletions(-) diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schema.json index c985a95..c0c8deb 100755 --- a/src/autotuning_methodology/schema.json +++ b/src/autotuning_methodology/schema.json @@ -1,88 +1,295 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", - "version": "0.1.2", + "version": "1.0.0", "title": "Experiment", - "description": "An experiment configuration file", + "description": "An experiment setup configuration file", "type": "object", + "required": [ + "name", + "parent_folder", + "experimental_groups_defaults", + "search_strategies", + "statistics_settings", + "visualization_settings" + ], "properties": { "version": { - "description": "Version number of the experiment file standard", + "description": "Version number of the experiment setup configuration file standard", "type": "string" }, "name": { "description": "Name of the experiment", "type": "string" }, - "folder_id": { - "description": "Unique ID of the folder to store the results of this experiment in", - "type": "string" - }, - "kernels_path": { - "description": "Path to the directory that has the tuning scripts specified in `kernels`, relative to the experiments file.", - "type": "string" - }, - "bruteforced_caches_path": { - "description": "Path to the directory that has the bruteforced caches, relative to the experiments file.", - "type": "string" + "parent folder": { + "description": "Absolute or relative path of the folder to store all related files for this experiment. This folder needs to already exist.", + "type": "string", + "default": "./" }, - "visualization_caches_path": { - "description": "Path to the directory to write / look for visualization caches, relative to the experiments file.", - "type": "string" + "experimental_groups_defaults": { + "description": "Default settings for experimental groups", + "type": "object", + "required": [ + "applications", + "gpus" + ], + "properties": { + "autotuner": { + "description": "Autotuner that will be used to tune the experimental group. Has to be specified either in experimental_groups_defaults or in experimental group.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "applications": { + "description": "List of applications for which measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "object", + "required": [ + "name", + "input_file" + ], + "properties": { + "name": { + "type": "string" + }, + "input_file": { + "type": "string" + } + } + } + }, + "gpus": { + "description": "List of GPUs where measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "string" + } + }, + "pattern_for_full_search_space_filenames": { + "description": "Pattern for filenames of full search space files", + "type": "object", + "required": [ + "regex", + "regex_variables" + ], + "properties": { + "regex": { + "type": "string", + "pattern": "(.*\\${gpus}.*\\${applications}.*\\.json)|(.*\\${kernel}.*\\${gpu}.*\\.json)", + "examples": [ + "${gpus}_${applications}_output.json", + "full-search-space-${applications}-${gpus}.json" + ] + }, + "regex_variables": { + "type": "array", + "items": { + "type": "string" + }, + "default": ["applications", "gpus"] + } + } + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search algorithm", + "type": "integer", + "minimum": 1, + "default": 20 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } + } }, - "kernels": { - "description": "Kernels to optimize", + "search_strategies": { + "description": "Settings for search strategies", "type": "array", - "items": { - "type": "string" - }, "minItems": 1, - "uniqueItems": true - }, - "GPUs": { - "description": "GPUs to optimize on", - "type": "array", + "uniqueItems": true, "items": { - "type": "string" - }, - "minItems": 1, - "uniqueItems": true - }, - "minimization": { - "description": "Direction of optimization (minimize or maximize)", - "type": "boolean", - "default": true - }, - "resolution": { - "description": "The resolution of the time range", - "type": "integer", - "minimum": 2 - }, - "cutoff_percentile": { - "description": "Fraction of difference between median and absolute optimum at which to stop the time range", - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - }, - "cutoff_percentile_start": { - "description": "Fraction of difference between median and absolute optimum at which to start the time range", - "type": "number", - "minimum": 0, - "exclusiveMaximum": 1 + "type": "object", + "required": [ + "name", + "search_method", + "display_name" + ], + "properties": { + "name": { + "description": "Name of the search strategy", + "type": "string" + }, + "autotuner": { + "description": "Autotuner that will be used for tuning. Has to be specified either in experimental_groups_defaults or in search_strategies.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "search_method": { + "description": "Name of the search method as recognized by the autotuner", + "type": "string" + }, + "search_method_hyperparameters": { + "description": "A list of hyperparameters for the search method as recognized by the autotuner", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": "string" + } + } + } + }, + "display_name": { + "description": "Name for the search strategy used in visualizations", + "type": "string" + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search strategy", + "type": "integer", + "minimum": 1, + "default": 20 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } + } + } }, - "cutoff_type": { - "description": "Whether to base the cutoff on function evaluations or time", - "type": "string", - "enum": [ - "fevals", - "time" - ] + "statistics_settings": { + "description": "Settings for the statistics calculation", + "type": "object", + "required": [ + "minimization", + "cutoff_percentile", + "cutoff_percentile_start", + "cutoff_type", + "objective_time_keys", + "objective_performance_keys" + ], + "properties": { + "minimization": { + "description": "Whether the optimization aims to minimize or maximize", + "type": "boolean", + "default": true + }, + "cutoff_percentile": { + "description": "Fraction of difference between median and absolute optimum at which to stop the time range", + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + }, + "cutoff_percentile_start": { + "description": "Fraction of difference between median and absolute optimum at which to start the time range", + "type": "number", + "minimum": 0, + "exclusiveMaximum": 1 + }, + "cutoff_type": { + "description": "Whether to base the cutoff on function evaluations or time", + "type": "string", + "enum": [ + "fevals", + "time" + ] + }, + "objective_time_keys": { + "description": "Time key(s) to use as the time objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "enum": [ + "compilation_time", + "runtimes", + "framework", + "search_algorithm", + "validation", + "all" + ] + }, + "uniqueItems": true, + "minItems": 1 + }, + "objective_performance_keys": { + "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true, + "minItems": 1 + } + } }, - "plot": { + "visualization_settings": { + "description": "Settings for the visualizations", "type": "object", + "required": [ + "resolution", + "x_axis_value_types", + "y_axis_value_types", + "confidence_level" + ], "properties": { - "plot_x_value_types": { - "description": "Types of value on the x-axis", + "resolution": { + "description": "The resolution of the time range", + "type": "integer", + "minimum": 2 + }, + "x_axis_value_types": { + "description": "Types of value on the x-axis. Multiple values produces multiple (sub) plots.", "type": "array", "items": { "type": "string", @@ -95,8 +302,8 @@ "minItems": 1, "uniqueItems": true }, - "plot_y_value_types": { - "description": "Types of value on the y-axis (absolute values, median-absolute normalized, improvement over baseline)", + "y_axis_value_types": { + "description": "Types of value on the y-axis. Multiple values produces multiple (sub) plots.", "type": "array", "items": { "type": "string", @@ -111,43 +318,23 @@ "uniqueItems": true }, "confidence_level": { - "type": [ - "number", - "null" - ], + "description": "The confidence level used for the confidence / prediction interval, visualized as an error shade", + "type": "number", + "default": 0.95, "exclusiveMinimum": 0, "maximum": 1 }, "compare_baselines": { + "description": "[preview feature] Compare baselines to each other. Requires editing the baselines list in the `plot_baselines_comparison` function.", "type": "boolean", "default": false }, "compare_split_times": { + "description": "[preview feature] Plot a comparison of split times for strategies and baselines", "type": "boolean", "default": false } - }, - "required": [ - "plot_x_value_types", - "plot_y_value_types", - "confidence_level" - ] + } } - }, - "required": [ - "version", - "name", - "folder_id", - "kernels_path", - "bruteforced_caches_path", - "visualization_caches_path", - "kernels", - "GPUs", - "minimization", - "resolution", - "cutoff_percentile", - "cutoff_percentile_start", - "cutoff_type", - "plot" - ] -} \ No newline at end of file + } +} From 594a87b1e6bbef740354eceb6af29d80ad4236f4 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 7 Aug 2024 12:47:11 +0200 Subject: [PATCH 002/234] Update experiments.py to work with a new schema --- src/autotuning_methodology/experiments.py | 503 +++++++++++++++++----- 1 file changed, 388 insertions(+), 115 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 382347b..e60ca47 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -3,18 +3,18 @@ from __future__ import annotations # for correct nested type hints e.g. list[str], tuple[dict, str] import json -import sys from argparse import ArgumentParser -from importlib import import_module from importlib.resources import files from math import ceil from os import getcwd +from os import makedirs from pathlib import Path from jsonschema import validate from autotuning_methodology.caching import ResultsDescription from autotuning_methodology.runner import collect_results +from autotuning_methodology.runner import convert_KTT_output_to_standard from autotuning_methodology.searchspace_statistics import SearchspaceStatistics @@ -30,13 +30,16 @@ def get_args_from_cli(args=None) -> str: Returns: The filepath to the experiments file. """ - CLI = ArgumentParser() - CLI.add_argument("experiment", type=str, help="The experiment.json to execute, see experiments/template.json") - args = CLI.parse_args(args) + cli = ArgumentParser() + cli.add_argument( + "experiment", type=str, + help="The experiment setup json file to execute, see experiments/template.json" + ) + args = cli.parse_args(args) filepath: str = args.experiment if filepath is None or filepath == "": raise ValueError( - "Invalid '-experiment' option. Run 'visualize_experiments.py -h' to read more about the options." + "Invalid '--experiment' option. Run 'visualize_experiments.py -h' to read more." ) return filepath @@ -51,6 +54,24 @@ def get_experiment_schema_filepath(): assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" return schemafile +def make_and_check_path(filename: str, parent = None, extension = None) -> Path: + filename_path = Path(filename) + if filename_path.is_absolute() is False and parent is not None: + filename_path = Path(parent).joinpath(filename).resolve() + if filename_path.exists(): + return filename_path + # try and add extension + if extension is None: + raise FileNotFoundError( + f"{filename_path} does not exist." + ) + filename_path = Path(str(filename_path) + extension) + if filename_path.exists(): + return filename_path + raise FileNotFoundError( + f"{filename_path} does not exist." + ) + def get_experiment(filename: str) -> dict: """Validates and gets the experiment from the experiments .json file. @@ -84,38 +105,286 @@ def get_experiment(filename: str) -> dict: return experiment -def get_strategies(experiment: dict) -> dict: - """Gets the strategies from an experiments file by augmenting it with the defaults. +def get_experimental_groups(experiment: dict) -> list[dict]: + """Prepares all the experimental groups as all combinations of application and gpus (from experimental_groups_defaults) and big experimental groups from setup file (experimental_groups, usually search methods). Check additional settings for each experimental group. Prepares the directory structure for the whole experiment. Args: experiment: the experiment dictionary object. Returns: - The strategies in the experiment dictionary object, augmented where necessery. + The experimental groups in the experiment dictionary object. + """ + experimental_groups_defaults = experiment["experimental_groups_defaults"] + search_strategies = experiment["search_strategies"] + + # set up the directory structure + experiment["parent_folder_absolute_path"] = Path(experiment["parent_folder"]).resolve() + # if folder "run" does not exist, create + makedirs(experiment["parent_folder_absolute_path"].joinpath("run"), exist_ok = True) + makedirs(experiment["parent_folder_absolute_path"].joinpath("setup"), exist_ok = True) + + # create folders for each experimental group from file + for strategy in search_strategies: + makedirs(experiment["parent_folder_absolute_path"].joinpath("run").joinpath(strategy["name"]), exist_ok = True) + + # generate all experimental groups + # with applications and gpus provided in experimental_groups_defaults + # and search strategies provided in search_strategies + all_experimental_groups = generate_all_experimental_groups( + search_strategies, + experimental_groups_defaults, + experiment["parent_folder_absolute_path"] + ) + + # additional check beyond validation + # if every experimental group has autotuner set + # set autotuner_path to default installation if not set by the user + for group in all_experimental_groups: + if group.get("autotuner") is None: + raise KeyError( + "Property 'autotuner' must be set for all groups, either in experimental_groups_defaults or in experimental_groups. It is not set for", + group["full_name"] + ) + if group["autotuner"] == "KTT": + if group["samples"] != 1: + raise NotImplementedError( + f"KTT currently supports only one sample per run and output. Please set samples=1 for group['full_name']." + ) + if group.get("autotuner_path") is None: + raise NotImplementedError( + "Default autotuner_path is not supported yet for KTT, please set autotuner_path for ", group["full_name"] + " to directory with KttTuningLauncher and pyktt.so, e.g. /home/user/KTT/Build/x86_64_Release." + ) + elif Path(group["autotuner_path"]).exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not exists. Try setting the absolute path." + ) + elif Path(group["autotuner_path"]).joinpath("KttTuningLauncher").exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not contain KttTuningLauncher. Have you used --tuning-loader when premaking KTT?" + ) + elif Path(group["autotuner_path"]).joinpath("pyktt.so").exists() is False: + raise FileNotFoundError( + f"Directory {group['autotuner_path']} does not contain pyktt.so. Have you used --python when premaking KTT?" + ) + # TODO make and set default autotuner path + + return all_experimental_groups + +def generate_all_experimental_groups( + search_strategies: list[dict], + experimental_groups_defaults: dict, + parent_folder_path: Path +) -> list[dict]: + """Generates all experimental groups for the experiment as a combination of given applications, gpus and search strategies from experiments setup file. + + Args: + search_strategies: list of dictionaries with settings for various search strategies from experiments setup file, section search_strategies. + experimental_groups_defaults: a dictionary with default settings for experimental groups from experiments setup file, section experimental_groups_defaults. + parent_folder_path: path to experiment parent folder that stores all files generated in the experiment. + + Returns: + A list of dictionaries, one for each experimental group. + """ + experimental_groups = [] + + for gpu in experimental_groups_defaults["gpus"]: + for application in experimental_groups_defaults["applications"]: + for strategy in search_strategies: + group = strategy + + for default in experimental_groups_defaults: + if default not in group and default not in [ + "applications", "gpus", "pattern_for_full_search_space_filenames" + ]: + group[default] = experimental_groups_defaults[default] + + group["full_name"] = "_".join([gpu, application["name"], group["name"]]) + + group["gpu"] = gpu + group["application_name"] = application["name"] + + group["application_folder"] = Path(application["folder"]) + group["application_input_file"] = make_and_check_path(application["input_file"], application["folder"], None) + group["input_file"] : Path + group["input_file"] = parent_folder_path.joinpath("setup").joinpath( + "_".join([group["full_name"], "input.json"])) + + if experimental_groups_defaults.get("pattern_for_full_search_space_filename") is None: + group["full_search_space_file"] = get_full_search_space_filename_from_input_file( + group["application_input_file"] + ) + else: + group["full_search_space_file"] = get_full_search_space_filename_from_pattern( + experimental_groups_defaults["pattern_for_full_search_space_filenames"], + gpu, + application["name"] + ) + + if group["autotuner"] == "KTT": + # convert full search space file from KTT output format to standard format + # note that full search space file in KTT output format still gets injected to input json, that is because KTT needs to have that file in its own format + # the converted file is loaded with this package when calculating search space statistics + group["converted_full_search_space_file"] = convert_KTT_to_standard_full_search_space_file( + group["full_search_space_file"], + parent_folder_path.joinpath("setup") + ) + + group["output_file"] : Path + group["output_file"] = parent_folder_path.joinpath("run").joinpath( + group["name"]).joinpath(group["full_name"] + ".json").resolve() + + + generate_input_file(group) + experimental_groups.append(group) + + return experimental_groups + + +def get_full_search_space_filename_from_input_file(input_filename: Path) -> Path: + """Returns a path to full search space file that is provided in the input json file in KernelSpecification.SimulationInput. + + Args: + input_filename: path to input json file. + + Raises: + KeyError: if the path is not provided, but is expected. + + Returns: + A path to full search space file that was written in the input json file. """ - strategy_defaults = experiment["strategy_defaults"] - strategies = experiment["strategies"] - # # get a baseline index if it exists - # baseline_index = list( - # strategy_index for strategy_index, strategy in enumerate(strategies) if "is_baseline" in strategy - # ) - # if len(baseline_index) != 1: - # raise ValueError(f"There must be exactly one baseline, found {len(baseline_index)} baselines") - # if strategies[baseline_index[0]]["is_baseline"] is not True: - # raise ValueError(f"is_baseline must be true, yet is set to {strategies[0]['is_baseline']}!") - # # if the baseline index is not 0, put the baseline strategy first - # if baseline_index[0] != 0: - # raise ValueError("The baseline strategy must be the first strategy in the experiments file!") - # # strategies.insert(0, strategies.pop(baseline_index[0])) - - # augment the strategies with the defaults - for strategy in strategies: - for default in strategy_defaults: - if default not in strategy: - strategy[default] = strategy_defaults[default] - return strategies + with open(input_filename, 'r', encoding="utf-8") as input_file: + input_json = json.load(input_file) + if input_json["KernelSpecification"].get("SimulationInput") is None: + raise KeyError( + "SimulationInput, i.e. full search space file is expected and not defined in", input_filename, ". Please set the path to that file in KernelSpecification.SimulationInput in input json file or set pattern_for_full_search_space_filename in experiments setup json file.") + full_search_space_filename = make_and_check_path(input_json["KernelSpecification"]["SimulationInput"], str(input_filename.parent), ".json") + # need to return filename WITHOUT .json, KTT (and probably also others) needs that in SimulationInput in input json as other autotuner can take other formats + return full_search_space_filename.parent.joinpath(full_search_space_filename.stem) + +def get_full_search_space_filename_from_pattern( + pattern: dict, gpu: str, application_name: str +) -> Path: + """Returns a path to full search space file that is generated from the pattern provided in experiments setup file. + + Args: + pattern: pattern regex string + gpu: name of the gpu, needs to be plugged into the pattern + application_name: name of the application, needs to be plugged into the pattern + + Raises: + NotImplementedError: if the regex expects other variables than just application name and gpu. + + Returns: + A path to full search file generated from the pattern. + """ + if pattern["regex_variables"] != ["applications", "gpus"]: + raise NotImplementedError("Other variables than applications and gpus in pattern for full search space filename are not supported yet. Sorry.") + filename = pattern["regex"].replace("${applications}", application_name).replace("${gpus}", gpu) + full_search_space_filename = make_and_check_path(filename) + return full_search_space_filename + +def convert_KTT_to_standard_full_search_space_file( + full_search_space_file: Path, + setup_folder: Path) -> Path: + """ Converts KTT-formatted full search space file to the standard format recognized by this package. + + Args: + full_search_space_file: the path to KTT-formatted full search space file + setup_folder: path to setup directory for this experiment + + Returns: + A path to newly created full search space file in standard format, in the setup directory of the experiment + """ + converted_output = convert_KTT_output_to_standard(full_search_space_file.with_suffix(".json")) + converted_filename = setup_folder.joinpath(full_search_space_file.stem + "_converted.json") + + with open(converted_filename, "w", encoding = "utf-8") as converted_file: + json.dump(converted_output, converted_file, indent=4) + + return converted_filename +def calculate_budget( + group: dict, statistics_settings: dict, searchspace_stats: SearchspaceStatistics +) -> dict: + """Calculates the budget for the experimental group, given cutoff point provided in experiments setup file. + + Args: + group: a dictionary with settings for experimental group + statistics_settings: a dictionary with settings related to statistics + searchspace_stats: a SearchspaceStatistics instance with cutoff points determined from related full search space files + + Returns: + A modified group dictionary. + """ + group["budget"] = {} + # set cutoff point + _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time( + statistics_settings["cutoff_percentile"] + ) + + # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 + cutoff_margin = group.get("cutoff_margin", 1.1) + + # set when to stop + if statistics_settings["cutoff_type"] == "time": + group["budget"]["time_limit"] = cutoff_point_time * cutoff_margin + else: + group["budget"]["max_fevals"] = min( + int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size + ) + + # write to group's input file as Budget + with open(group["input_file"], "r", encoding="utf-8") as fp: + input_json = json.load(fp) + if input_json.get("Budget") is None: + input_json["Budget"] = [] + input_json["Budget"].append({}) + if group["budget"].get("time_limit") is not None: + input_json["Budget"][0]["Type"] = "TuningDuration" + input_json["Budget"][0]["BudgetValue"] = group["budget"]["time_limit"] + else: #it's max_fevals + input_json["Budget"][0]["Type"] = "ConfigurationCount" + input_json["Budget"][0]["BudgetValue"] = group["budget"]["max_fevals"] + + with open(group["input_file"], "w", encoding="utf-8") as fp: + json.dump(input_json, fp, indent=4) + + return group + +def generate_input_file(group: dict): + """Creates a input json file specific for a given application, gpu and search method. + + Args: + group: dictionary with settings for a given experimental group. + """ + with open(group["application_input_file"], "r", encoding="utf-8") as fp: + input_json = json.load(fp) + input_json["KernelSpecification"]["SimulationInput"] = str(group["full_search_space_file"]) + input_json["General"]["OutputFile"] = str( + group["output_file"].parent.joinpath(group["output_file"].stem) + ) + if input_json["General"]["OutputFormat"] != "JSON": + raise RuntimeError(f"Only JSON output format is supported. Please set General.OutputFormat to JSON in {group['application_input_file']}.") + if input_json["KernelSpecification"].get("Device") is None: + input_json["KernelSpecification"]["Device"] = {} + input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] + else: + input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] + + input_json["Search"] = {} + input_json["Search"]["Name"] = group["search_method"] + if group.get("search_method_hyperparameters") is not None: + input_json["Search"]["Attributes"] = [] + for param in group["search_method_hyperparameters"]: + attribute = {} + attribute["Name"] = param["name"] + attribute["Value"] = param["value"] + input_json["Search"]["Attributes"].append(attribute) + # note that this is written to a different file, specific for gpu, application and search method + with open(group["input_file"], "w", encoding="utf-8") as fp: + json.dump(input_json, fp, indent=4) + def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, dict, dict]: """Executes the experiment by retrieving it from the cache or running it. @@ -127,100 +396,104 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di FileNotFoundError: if the path to the kernel specified in the experiments file is not found. Returns: - A tuple of the experiment dictionary, the strategies executed, and the resulting list of ``ResultsDescription``. + A tuple of the experiment dictionary, the experimental groups executed, and the resulting list of ``ResultsDescription``. """ experiment = get_experiment(filepath) - experiment_folderpath = Path(filepath).parent + experiment_folderpath = Path(experiment["parent_folder"]) print(f"Starting experiment '{experiment['name']}'") - experiment_folder_id: str = experiment["folder_id"] - minimization: bool = experiment.get("minimization", True) - cutoff_percentile: float = experiment.get("cutoff_percentile", 1) - cutoff_type: str = experiment.get("cutoff_type", "fevals") - assert cutoff_type == "fevals" or cutoff_type == "time", f"cutoff_type must be 'fevals' or 'time', is {cutoff_type}" - curve_segment_factor: float = experiment.get("curve_segment_factor", 0.05) - assert isinstance(curve_segment_factor, float), f"curve_segment_factor is not float, {type(curve_segment_factor)}" - strategies: list[dict] = get_strategies(experiment) - - # add the kernel directory to the path to import the module, relative to the experiment file - kernels_path = experiment_folderpath / Path(experiment["kernels_path"]) - if not kernels_path.exists(): - raise FileNotFoundError(f"No such path {kernels_path.resolve()}, CWD: {getcwd()}") - sys.path.append(str(kernels_path)) - kernel_names = experiment["kernels"] - kernels = list(import_module(kernel_name) for kernel_name in kernel_names) - - # variables for comparison - objective_time_keys: list[str] = experiment["objective_time_keys"] - objective_performance_keys: list[str] = experiment["objective_performance_keys"] - - # execute each strategy in the experiment per GPU and kernel - results_descriptions: dict[str, dict[str, dict[str, ResultsDescription]]] = dict() - gpu_name: str - for gpu_name in experiment["GPUs"]: - print(f" | running on GPU '{gpu_name}'") - results_descriptions[gpu_name] = dict() - for index, kernel in enumerate(kernels): - kernel_name = kernel_names[index] - searchspace_stats = SearchspaceStatistics( - kernel_name=kernel_name, - device_name=gpu_name, - minimization=minimization, - objective_time_keys=objective_time_keys, - objective_performance_keys=objective_performance_keys, - bruteforced_caches_path=experiment_folderpath / experiment["bruteforced_caches_path"], - ) - # set cutoff point - _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time(cutoff_percentile) - - print(f" | - optimizing kernel '{kernel_name}'") - results_descriptions[gpu_name][kernel_name] = dict() - for strategy in strategies: - strategy_name: str = strategy["name"] - strategy_display_name: str = strategy["display_name"] - stochastic = strategy["stochastic"] - cutoff_margin = strategy.get( - "cutoff_margin", 1.1 - ) # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 - print(f" | - | using strategy '{strategy['display_name']}'") - - # setup the results description - if "options" not in strategy: - strategy["options"] = dict() - - # set when to stop - if cutoff_type == "time": - strategy["options"]["time_limit"] = cutoff_point_time * cutoff_margin - else: - strategy["options"]["max_fevals"] = min( - int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size - ) - results_description = ResultsDescription( - experiment_folder_id, - kernel_name, - gpu_name, - strategy_name, - strategy_display_name, - stochastic, - objective_time_keys=objective_time_keys, - objective_performance_keys=objective_performance_keys, - minimization=minimization, - visualization_caches_path=experiment_folderpath / experiment["visualization_caches_path"], + all_experimental_groups = get_experimental_groups(experiment) + + # prepare objective_time_keys, in case it was defined as all, explicitly list all keys + objective_time_keys: list[str] = experiment["statistics_settings"]["objective_time_keys"] + if "all" in objective_time_keys: + objective_time_keys = [] + # get the path to the schema + schemafile = get_experiment_schema_filepath() + # open the experiment file and validate using the schema file + with open(schemafile, "r", encoding="utf-8") as schemafile: + schema = json.load(schemafile) + objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"]["items"]["enum"] + objective_time_keys.remove("all") + experiment["statistics_settings"]["objective_time_keys"] = objective_time_keys + + experiment["experimental_groups_defaults"]["applications_names"] = [] + for application in experiment["experimental_groups_defaults"]["applications"]: + experiment["experimental_groups_defaults"]["applications_names"].append(application["name"]) + + # initialize the matrix of results_descriptions based on provided gpus and applications + # initialize searchspace statistics, one for each full search file + results_descriptions: dict[str, dict[str, dict[str, ResultsDescription]]] = {} + searchspace_statistics: dict[str, dict[str, SearchspaceStatistics]] = {} + + for gpu in experiment["experimental_groups_defaults"]["gpus"]: + results_descriptions[gpu] = {} + searchspace_statistics[gpu] = {} + for application in experiment["experimental_groups_defaults"]["applications_names"]: + results_descriptions[gpu][application] = {} + + + +# just iterate over experimental_groups, collect results and write to proper place + for group in all_experimental_groups: + + print(f" | - running on GPU '{group['gpu']}'") + print(f" | - | tuning application '{group['application_name']}'") + print(f" | - | - | with settings of experimental group '{group['display_name']}'") + + # create SearchspaceStatistics for full search space file associated with this group, if it does not exist + if searchspace_statistics.get(group["gpu"]).get(group["application_name"]) is None: + full_search_space_file_path = None + if group.get("converted_full_search_space_file") is None: + full_search_space_file_path = group["full_search_space_file"] + else: + full_search_space_file_path = group["converted_full_search_space_file"] + + searchspace_statistics[group["gpu"]][group["application_name"]] = SearchspaceStatistics( + application_name = group["application_name"], + device_name = group["gpu"], + minimization = experiment["statistics_settings"]["minimization"], + objective_time_keys = objective_time_keys, + objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"], + full_search_space_file_path = full_search_space_file_path, ) - # if the strategy is in the cache, use cached data - if "ignore_cache" not in strategy and results_description.has_results(): - print(" | - |-> retrieved from cache") - else: # execute each strategy that is not in the cache - results_description = collect_results( - kernel, strategy, results_description, searchspace_stats, profiling=profiling - ) + # calculation of budget can be done only now, after searchspace statistics have been initialized + group = calculate_budget( + group, + experiment["statistics_settings"], + searchspace_statistics[group["gpu"]][group["application_name"]] + ) - # set the results - results_descriptions[gpu_name][kernel_name][strategy_name] = results_description + results_description = ResultsDescription( + run_folder = experiment_folderpath/ "run" / group["name"], + application_name = group["application_name"], + device_name = group["gpu"], + group_name = group["name"], + group_display_name = group["display_name"], + stochastic = group["stochastic"], + objective_time_keys = objective_time_keys, + objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"], + minimization = experiment["statistics_settings"]["minimization"], + ) + + + # if the strategy is in the cache, use cached data + if ("ignore_cache" not in group or group["ignore_cache"] is False) and results_description.has_results(): + print(" | - | - | -> retrieved from cache") + else: # execute each strategy that is not in the cache + results_description = collect_results( + group["input_file"], + group, + results_description, + searchspace_statistics[group["gpu"]][group["application_name"]], + profiling=profiling + ) - return experiment, strategies, results_descriptions + # set the results + results_descriptions[group["gpu"]][group["application_name"]][group["name"]] = results_description + return experiment, all_experimental_groups, searchspace_statistics, results_descriptions def entry_point(): # pragma: no cover """Entry point function for Experiments.""" From f4fdac7197a3b2a060ffc1dc46ff9c16cc3708ea Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 7 Aug 2024 12:51:25 +0200 Subject: [PATCH 003/234] Update caching.py to work with a new schema --- src/autotuning_methodology/caching.py | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index 2daa221..622af19 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -30,44 +30,41 @@ class ResultsDescription: def __init__( self, - folder_id: str, - kernel_name: str, + run_folder: Path, + application_name: str, device_name: str, - strategy_name: str, - strategy_display_name: str, + group_name: str, + group_display_name: str, stochastic: bool, objective_time_keys: list[str], objective_performance_keys: list[str], minimization: bool, - visualization_caches_path: Path, ) -> None: """Initialization method for the ResultsDescription object. Args: - folder_id: the unique ID of the folder to store in. - kernel_name: the name of the kernel used. + run_folder: a folder to store all files generated during experiments + application_name: the name of the application. device_name: the name of the device used. - strategy_name: the name of the optimization algorithm used, must not contain spaces or special characters. - strategy_display_name: the name of the optimization algorithm used in printing / visualization. - stochastic: whether the optimization algorithm is stochastic. + group_name: the name of the experimental group, usually search method used, must not contain spaces or special characters. + group_display_name: the name of the experimental group used in printing / visualization. + stochastic: whether the search method is stochastic. objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. - minimization: whether the optimization algorithm performed minimization (attempted to find the minimum). - visualization_caches_path: path to visualization caches relative to the experiments file, creation allowed. + minimization: whether the search method performed minimization (attempted to find the minimum). """ # all attributes must be hashable for symetric difference checking self._version = "1.3.0" self.__stored = False - self.__folder_id = folder_id - self.kernel_name = kernel_name + self.application_name = application_name self.device_name = device_name - self.strategy_name = strategy_name - self.strategy_display_name = strategy_display_name + self.group_name = group_name + self.group_display_name = group_display_name self.stochastic = stochastic self.objective_time_keys = objective_time_keys self.objective_performance_keys = objective_performance_keys self.minimization = minimization - self.visualization_caches_path = visualization_caches_path + self.run_folder = run_folder self.numpy_arrays_keys = [ "fevals_results", "objective_time_results", @@ -85,7 +82,7 @@ def __get_as_dict(self) -> dict: a dictionary, similar to self.__dict__ but with some keys removed. """ dictionary = vars(self) - not_saved_keys = ["strategy_display_name", "visualization_caches_path"] + not_saved_keys = ["group_display_name", "visualization_caches_path"] for not_saved_key in not_saved_keys: if not_saved_key in dictionary.keys(): del dictionary[not_saved_key] @@ -124,7 +121,7 @@ def is_same_as(self, other: ResultsDescription) -> bool: # check if same value for each key for attribute_key, attribute_value in self.__get_as_dict().items(): - if attribute_key == "strategy_display_name" or attribute_key == "visualization_caches_path": + if attribute_key == "group_display_name" or attribute_key == "visualization_caches_path": continue else: assert ( @@ -134,11 +131,11 @@ def is_same_as(self, other: ResultsDescription) -> bool: return True def __get_cache_filename(self) -> str: - return f"{self.device_name}_{self.strategy_name}.npz" + return f"{self.device_name}_{self.application_name}.npz" def __get_cache_filepath(self) -> Path: """Get the filepath to this experiment.""" - return self.visualization_caches_path / self.__folder_id / self.kernel_name + return self.run_folder def __get_cache_full_filepath(self) -> Path: """Get the filepath for this file, including the filename and extension.""" From f34a7f06f03495918e4c3d852cb321ae30134392 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 7 Aug 2024 13:12:42 +0200 Subject: [PATCH 004/234] Add KTT tuning to runner.py --- src/autotuning_methodology/runner.py | 363 ++++++++++++++------------- 1 file changed, 190 insertions(+), 173 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index ec9cb45..c75e2ec 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -9,6 +9,7 @@ import warnings from inspect import getfile from pathlib import Path +import subprocess import numpy as np import progressbar @@ -22,9 +23,8 @@ is_valid_config_result, ) +#TODO this does not conform to new intedned dicrectory structure folder = Path(__file__).parent.parent.parent -import_runs_path = Path(folder, "cached_data_used/import_runs") - # Imported runs must be remapped to have the same keys, values and order of parameters as the other runs. # This mapping provides both the order and mapping, so all keys must be present. @@ -55,19 +55,19 @@ @contextlib.contextmanager -def temporary_working_directory_change(new_WD: Path): +def temporary_working_directory_change(new_wd: Path): """Temporarily change to the given working directory in a context. Based on https://stackoverflow.com/questions/75048986/way-to-temporarily-change-the-directory-in-python-to-execute-code-without-affect. Args: - new_WD: path of the working directory to temporarily change to. + new_wd: path of the working directory to temporarily change to. """ - assert new_WD.exists() + assert new_wd.exists() # save the current working directory so we can revert to it original_working_directory = os.getcwd() # potentially raises an exception, left to the caller - os.chdir(new_WD) + os.chdir(new_wd) # yield control to the caller try: @@ -81,12 +81,78 @@ def temporary_working_directory_change(new_WD: Path): def load_json(path: Path): """Helper function to load a JSON file.""" - assert path.exists(), f"File {path.name} does not exist relative to {os.getcwd()}" + assert path.exists(), f"File {str(path)} does not exist relative to {os.getcwd()}" with path.open() as file_results: return json.load(file_results) +def convert_KTT_output_to_standard(output_filename: Path) -> dict: + with open(output_filename, 'r', encoding="utf-8") as fp: + ktt_output = json.load(fp) + + ktt_result_status_mapping = { + "Ok":"correct", + "ComputationFailed":"runtime", + "ValidationFailed":"correctness", + "CompilationFailed":"compile", + "DeviceLimitsExceeded":"runtime" + # timeout is marked as ComputationFailed in KTT + # constraints is marked as CompilationFailed in KTT + } + # map all timeunits to milliseconds + ktt_timeunit_mapping = { + "seconds": lambda x: x * 1000, + "milliseconds": lambda x: x, + "microseconds": lambda x: x / 1000, + "nanoseconds": lambda x: x / 1000000, + } -def get_results_and_metadata( + converted_output = {} + + converted_output["schema_version"] = "1.0.0" + converted_output["results"] = [] + timemapper = ktt_timeunit_mapping[str(ktt_output["Metadata"]["TimeUnit"]).lower()] + + for ktt_result in ktt_output["Results"]: + converted_result = {} + converted_result["timestamp"] = ktt_output["Metadata"]["Timestamp"] + # note that KTT outputs each run separately, it does not merge the output for the same configuration + converted_result["configuration"] = {} + for tp in ktt_result["Configuration"]: + converted_result["configuration"][tp["Name"]] = tp["Value"] + # TODO PowerUsage also possible + converted_result["objectives"] = ["TotalDuration"] + converted_result["times"] = {} + # compilation time can be also calculated as sum of "Overhead" in all ComputationResults, it's just easier to do it this way in case of multiple kernel functions within one application + converted_result["times"]["compilation_time"] = timemapper( + ktt_result["TotalOverhead"] - + ktt_result["DataMovementOverhead"] - + ktt_result["SearcherOverhead"] - + ktt_result["ValidationOverhead"] + ) + converted_result["times"]["runtimes"] = [timemapper(ktt_result["TotalDuration"])] + converted_result["times"]["framework"] = timemapper(ktt_result["DataMovementOverhead"]) + converted_result["times"]["search_algorithm"] = timemapper(ktt_result["SearcherOverhead"]) + converted_result["times"]["validation"] = timemapper(ktt_result["ValidationOverhead"]) + # timeout, compile, runtime, correctness, constraints, correct + converted_result["invalidity"] = ktt_result_status_mapping[ktt_result["Status"]] + if ktt_result["Status"] == "ValidationFailed": + converted_result["correctness"] = 0 + else: + converted_result["correctness"] = 1 + converted_result["measurements"] = [] + converted_result["measurements"].append({ + "name": "TotalDuration", + "value": timemapper(ktt_result["TotalDuration"]), + "unit": "milliseconds" + }) + # TODO what do we want here in case of multiple ComputationResults for multiple kernel functions? + if "ProfilingData" in ktt_result["ComputationResults"][0]: + for pc in ktt_result["ComputationResults"][0]["ProfilingData"]["Counters"]: + converted_result["measurements"].append({"name":pc["Name"], "value":pc["Value"], "unit": ""}) + converted_output["results"].append(converted_result) + return converted_output + +def get_kerneltuner_results_and_metadata( filename_results: str = f"{folder}../last_run/_tune_configuration-results.json", filename_metadata: str = f"{folder}../last_run/_tune_configuration-metadata.json", ) -> tuple[list, list]: @@ -105,12 +171,11 @@ def get_results_and_metadata( def tune( - run_number: int, - kernel, - kernel_name: str, + input_file, + application_name: str, device_name: str, - strategy: dict, - tune_options: dict, + group: dict, + tune_options: dict, #TODO check if still necessary when we have input json file profiling: bool, searchspace_stats: SearchspaceStatistics, ) -> tuple[list, list, int]: @@ -119,11 +184,10 @@ def tune( Optionally collects profiling statistics. Args: - run_number: the run number (only relevant when importing). - kernel: the program (kernel) to tune. - kernel_name: the name of the program to tune. + input_file: the json input file for tuning the application. + application_name: the name of the program to tune. device_name: the device (GPU) to tune on. - strategy: the optimization algorithm to optimize with. + group: the experimental group (usually the search method). tune_options: a special options dictionary passed along to the autotuning framework. profiling: whether profiling statistics should be collected. searchspace_stats: a ``SearchspaceStatistics`` object passed to convert imported runs. @@ -156,7 +220,7 @@ def tune_with_kerneltuner(): ) if profiling: yappi.stop() - metadata, results = get_results_and_metadata( + metadata, results = get_kerneltuner_results_and_metadata( filename_results=kernel.file_path_results, filename_metadata=kernel.file_path_metadata ) # check that the number of iterations is correct @@ -180,144 +244,92 @@ def tune_with_BAT(): """Interface to tune with the BAT benchmarking suite.""" # TODO integrate with BAT - def import_from_KTT(use_param_mapping=True, use_bruteforce_objective=True): - """Import a KTT output file.""" - # import the file - assert import_runs_path.exists() and import_runs_path.is_dir() - expected_filename = ( - f"t~'ktt'd~'{device_name}'k~'{kernel_name}'s~'{strategy['strategy']}'r~{run_number}.json".lower() - ) - matching_runs: list[dict] = list() - for file in import_runs_path.iterdir(): - if file.name == expected_filename: - matching_runs.append(load_json(file)) - if len(matching_runs) < 1: - raise FileNotFoundError(f"No files to import found with name '{expected_filename}'") - if len(matching_runs) > 1: - raise FileExistsError( - f"{len(matching_runs)} files exist with name '{expected_filename}', there can be only one" + def tune_with_KTT(): + """Interface with KTT to tune the kernel and return the results.""" + if profiling: + yappi.set_clock_type("cpu") + yappi.start() + # run KttTuningLauncher with input file + # change the directory to application folder + # TODO check if changing the directory is necessary, I think it was just looking for cu file, which is not actually necessary in simulated execution + with temporary_working_directory_change(group["application_folder"]): + # copy the modified input file (with inserted search method, budget, etc.) + subprocess.run( + ["cp", str(group["input_file"]), str(group["application_folder"])], + check=False ) - run = matching_runs[0] - - # map all timeunits to miliseconds - ktt_timeunit_mapping = { - "seconds": lambda x: x * 1000, - "miliseconds": lambda x: x, - "microseconds": lambda x: x / 1000, - } - ktt_status_mapping = { - "ok": "correct", - "devicelimitsexceeded": "compile", - "computationfailed": "runtime", - } - - # convert to the T4 format - metadata = None # TODO implement the metadata conversion when necessary - results = list() - run_metadata: dict = run["Metadata"] - run_results: list[dict] = run["Results"] - timemapper = ktt_timeunit_mapping[str(run_metadata["TimeUnit"]).lower()] + try: + # execute KttTuningLauncher from autotuner_path directory + executable = Path(group["autotuner_path"]).joinpath("KttTuningLauncher") + if group.get("set_this_to_pythonpath") is None: + proc_result = subprocess.run([str(executable), group["input_file"].name], + capture_output=True, check=True, + env = os.environ | {'PYTHONPATH':group["autotuner_path"]} + ) + else: + subprocess.run([str(executable), group["input_file"].name], + capture_output=True, check=True, + env = os.environ | {'PYTHONPATH':group["set_this_to_pythonpath"]} + ) + + # TODO this is a bug in KTT, sometimes it returns non-zero exit code even though nothing bad happened + # catching the expcetion here then covers even the situation when KTT fails, but I write the output just to let the user know what is going on if there is a runtime error + except subprocess.CalledProcessError as er: + print(er.stdout) + print(er.stderr) + pass + # remove the modified input file, output file was written in experiment_parent_folder/run/group_name/ + subprocess.run(["rm", group["input_file"].name], check=False) + if profiling: + yappi.stop() + metadata, results, total_time_ms = get_KTT_results_and_metadata(group["output_file"]) + if "max_fevals" in group["budget"]: + max_fevals = group["budget"]["max_fevals"] + if len(results) < max_fevals * 0.1: + warnings.warn( + f"Much fewer configurations were returned ({len(results)}) than the requested {max_fevals}" + ) + if len(results) < 2: + raise ValueError("Less than two configurations were returned") + return metadata, results, total_time_ms + + def get_KTT_results_and_metadata( + output_filename: str + #use_param_mapping=True + ) -> tuple[dict, list, float] : + # TODO not sure what use_param_mapping was for + """Retrieves results from KTT run. + + Args: + output_filename: file with KTT output + use_param_mapping: used in testing? + + Returns: + A tuple, a dictionary with metadata, a list of results and a float with total experiment duration in ms. + """ + # convert the KTT-formatted file to dictionary corresponding to standard json format + run_output = convert_KTT_output_to_standard(output_filename) + + metadata: dict = {} + results: list[dict] = run_output["results"] + total_time_ms = 0 - for config_attempt in run_results: - - # convert the configuration to T4 style dictionary for fast lookups in the mapping - configuration_ktt = dict() - for param in config_attempt["Configuration"]: - configuration_ktt[param["Name"]] = param["Value"] - - # convert the configuration data with the mapping in the correct order - configuration = dict() - if use_param_mapping and kernel_name in ktt_param_mapping: - param_map = ktt_param_mapping[kernel_name] - assert len(param_map) == len( - configuration_ktt - ), f"Mapping provided for {len(param_map)} params, but configuration has {len(configuration_ktt)}" - for param_name, mapping in param_map.items(): - param_value = configuration_ktt[param_name] - # if the mapping is None, do not include the parameter - if mapping is None: - pass - # if the mapping is a tuple, the first argument is the new parameter name and the second the value - elif isinstance(mapping, tuple): - param_mapped_name, param_mapped_value = mapping - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - # if it's a list of tuples, map to multiple parameters - elif isinstance(mapping, list): - for param_mapped_name, param_mapped_value in mapping: - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - else: - raise ValueError(f"Can not apply parameter mapping of {type(mapping)} ({mapping})") - else: - configuration = configuration_ktt + for result in results: # add to total time - total_duration = timemapper(config_attempt["TotalDuration"]) - total_overhead = timemapper(config_attempt["TotalOverhead"]) + total_duration = 0 + for m in result["measurements"]: + if m["name"] is "TotalDuration": + total_duration = m["value"] + break + total_overhead = result["times"]["compilation_time"] + result["times"]["framework"] + result["times"]["search_algorithm"] + result["times"]["validation"] total_time_ms += total_duration + total_overhead - # convert the times data - times_runtimes = [] - duration = "" - if len(config_attempt["ComputationResults"]) > 0: - for config_result in config_attempt["ComputationResults"]: - times_runtimes.append(timemapper(config_result["Duration"])) - if use_bruteforce_objective: - config_string_key = ",".join(str(x) for x in configuration.values()) - duration = searchspace_stats.get_value_in_config(config_string_key, "time") - else: - duration = np.mean(times_runtimes) - assert ( - "iterations" in strategy - ), "For imported KTT runs, the number of iterations must be specified in the experiments file" - if strategy["iterations"] != len(times_runtimes): - times_runtimes = [np.mean(times_runtimes)] * strategy["iterations"] - warnings.warn( - f"The specified number of iterations ({strategy['iterations']}) did not equal" - + f"the actual number of iterations ({len(times_runtimes)}). " - + "The average has been used." - ) - if (not isinstance(duration, (float, int, np.number))) or np.isnan(duration): - duration = "" - times_search_algorithm = timemapper(config_attempt.get("SearcherOverhead", 0)) - times_validation = timemapper(config_attempt.get("ValidationOverhead", 0)) - times_framework = timemapper(config_attempt.get("DataMovementOverhead", 0)) - times_benchmark = total_duration - times_compilation = total_overhead - times_search_algorithm - times_validation - times_framework - - # assemble the converted data - converted = { - "configuration": configuration, - "invalidity": ktt_status_mapping[str(config_attempt["Status"]).lower()], - "correctness": 1, - "measurements": [ - { - "name": "time", - "value": duration, - "unit": "ms", - } - ], - "objectives": ["time"], - "times": { - "compilation": times_compilation, - "benchmark": times_benchmark, - "framework": times_framework, - "search_algorithm": times_search_algorithm, - "validation": times_validation, - "runtimes": times_runtimes, - }, - } - results.append(converted) - return metadata, results, round(total_time_ms) - strategy_name = str(strategy["name"]).lower() - if strategy_name.startswith("ktt_"): - metadata, results, total_time_ms = import_from_KTT() - elif strategy_name.startswith("kerneltuner_") or True: + if group["autotuner"] == "KTT": + metadata, results, total_time_ms = tune_with_KTT() + elif group["autotuner"] == "KernelTuner": total_start_time = python_time.perf_counter() warnings.simplefilter("ignore", UserWarning) try: @@ -329,14 +341,14 @@ def import_from_KTT(use_param_mapping=True, use_bruteforce_objective=True): total_end_time = python_time.perf_counter() total_time_ms = round((total_end_time - total_start_time) * 1000) else: - raise ValueError(f"Invalid autotuning framework '{strategy_name}'") + raise ValueError(f"Invalid autotuning framework '{group['autotuner']}'") # be careful not to rely on total_time_ms when profiling, because it will include profiling time return metadata, results, total_time_ms def collect_results( - kernel, - strategy: dict, + input_file, + group: dict, results_description: ResultsDescription, searchspace_stats: SearchspaceStatistics, profiling: bool, @@ -344,33 +356,33 @@ def collect_results( """Executes optimization algorithms on tuning problems to capture their behaviour. Args: - kernel: the program (kernel) to tune. - strategy: the optimization algorithm to optimize with. - searchspace_stats: the ``SearchspaceStatistics`` object, only used for conversion of imported runs. + input_file: an input json file to tune. + group: a dictionary with settings for experimental group. results_description: the ``ResultsDescription`` object to write the results to. + searchspace_stats: the ``SearchspaceStatistics`` object, only used for conversion of imported runs. profiling: whether profiling statistics must be collected. Returns: The ``ResultsDescription`` object with the results. """ - min_num_evals: int = strategy["minimum_number_of_evaluations"] + min_num_evals: int = group["minimum_number_of_valid_search_iterations"] # TODO put the tune options in the .json in strategy_defaults? Make it Kernel Tuner independent tune_options = {"verbose": False, "quiet": True, "simulation_mode": True} - def report_multiple_attempts(rep: int, len_res: int, strategy_repeats: int): + def report_multiple_attempts(rep: int, len_res: int, group_repeats: int): """If multiple attempts are necessary, report the reason.""" if len_res < 1: - print(f"({rep+1}/{strategy_repeats}) No results found, trying once more...") + print(f"({rep+1}/{group_repeats}) No results found, trying once more...") elif len_res < min_num_evals: print(f"Too few results found ({len_res} of {min_num_evals} required), trying once more...") else: - print(f"({rep+1}/{strategy_repeats}) Only invalid results found, trying once more...") + print(f"({rep+1}/{group_repeats}) Only invalid results found, trying once more...") - # repeat the strategy as specified - repeated_results = list() + # repeat the run as specified + repeated_results = [] total_time_results = np.array([]) for rep in progressbar.progressbar( - range(strategy["repeats"]), + range(group["repeats"]), redirect_stdout=True, prefix=" | - |-> running: ", widgets=[ @@ -389,13 +401,12 @@ def report_multiple_attempts(rep: int, len_res: int, strategy_repeats: int): len_res: int = -1 while only_invalid or len_res < min_num_evals: if attempt > 0: - report_multiple_attempts(rep, len_res, strategy["repeats"]) - metadata, results, total_time_ms = tune( - rep, - kernel, - results_description.kernel_name, + report_multiple_attempts(rep, len_res, group["repeats"]) + _, results, total_time_ms = tune( + input_file, + results_description.application_name, results_description.device_name, - strategy, + group, tune_options, profiling, searchspace_stats, @@ -413,7 +424,7 @@ def report_multiple_attempts(rep: int, len_res: int, strategy_repeats: int): if profiling: stats = yappi.get_func_stats() # stats.print_all() - path = "../old_experiments/profilings/random/profile-v2.prof" + path = results_description.run_folder + "/profile-v2.prof" stats.save(path, type="pstat") # pylint: disable=no-member yappi.clear_stats() @@ -447,7 +458,9 @@ def get_nan_array() -> np.ndarray: objective_performance_results = get_nan_array() objective_performance_best_results = get_nan_array() objective_performance_stds = get_nan_array() - objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan) + objective_time_results_per_key = np.full( + (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan + ) objective_performance_results_per_key = np.full( (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan ) @@ -470,15 +483,19 @@ def get_nan_array() -> np.ndarray: # TODO continue here with implementing switch in output format # obtain the objective time per key - objective_times_list = list() + objective_times_list = [] for key_index, key in enumerate(objective_time_keys): evaluation_times = evaluation["times"] assert ( key in evaluation_times ), f"Objective time key {key} not in evaluation['times'] ({evaluation_times})" - value = evaluation_times[key] + if isinstance(evaluation_times[key], list): + # this happens when runtimes are in objective_time_keys + value = sum(evaluation_times[key]) + else: + value = evaluation_times[key] if value is not None and not is_invalid_objective_time(value): - value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner + #value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner objective_time_results_per_key[key_index, evaluation_index, repeat_index] = value objective_times_list.append(value) # sum the objective times of the keys @@ -489,7 +506,7 @@ def get_nan_array() -> np.ndarray: objective_time_results[evaluation_index, repeat_index] = cumulative_objective_time # obtain the objective performance per key (called 'measurements' in the T4 format) - objective_performances_list = list() + objective_performances_list = [] for key_index, key in enumerate(objective_performance_keys): evaluation_measurements = evaluation["measurements"] measurements = list(filter(lambda m: m["name"] == key, evaluation_measurements)) From aad790db8dbe3e0c5afd295c5fb0f6b407e7e9fa Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 7 Aug 2024 13:41:39 +0200 Subject: [PATCH 005/234] Update searchspace_statistics.py to work with a common output json schema --- .../searchspace_statistics.py | 118 +++++++++--------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 9fe2c06..0cb6fcf 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -5,6 +5,7 @@ import json from math import ceil, floor from pathlib import Path +import matplotlib.pyplot as plt import numpy as np @@ -28,7 +29,7 @@ def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: class SearchspaceStatistics: - """Object for obtaining information from a raw, brute-forced cache file.""" + """Object for obtaining information from a full search space file.""" size: int repeats: int @@ -43,8 +44,8 @@ class SearchspaceStatistics: objective_performances_total_sorted_nan: np.ndarray T4_time_keys_to_kernel_tuner_time_keys_mapping = { - "compilation": "compile_time", - "benchmark": "benchmark_time", + "compilation_time": "compile_time", + "runtimes": "benchmark_time", "framework": "framework_time", "search_algorithm": "strategy_time", "validation": "verification_time", @@ -55,30 +56,30 @@ class SearchspaceStatistics: def __init__( self, - kernel_name: str, + application_name: str, device_name: str, minimization: bool, objective_time_keys: list[str], objective_performance_keys: list[str], - bruteforced_caches_path=Path("cached_data_used/cachefiles"), + full_search_space_file_path: str, ) -> None: """Initialization method for a Searchspace statistics object. Args: - kernel_name: the name of the kernel. + application_name: the name of the kernel. device_name: the name of the device (GPU) used. minimization: whether the optimization algorithm was minimizing. objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. - bruteforced_caches_path: the path to the bruteforced caches. + full_search_space_path: the path to the full search space file. """ self.loaded = False - self.kernel_name = kernel_name + self.application_name = application_name self.device_name = device_name self.minimization = minimization - self.objective_time_keys = self.T4_time_keys_to_kernel_tuner_time_keys(objective_time_keys) + self.objective_time_keys = objective_time_keys self.objective_performance_keys = objective_performance_keys - self.bruteforced_caches_path = bruteforced_caches_path + self.full_search_space_file_path = full_search_space_file_path # load the data into the arrays self.loaded = self._load() @@ -131,9 +132,8 @@ def plot_histogram(self, cutoff_percentile: float): cutoff_percentile: the desired cutoff percentile to reach before stopping. """ # prepare plot - import matplotlib.pyplot as plt - fig, axs = plt.subplots(1, 1, sharey=True, tight_layout=True) + _, axs = plt.subplots(1, 1, sharey=True, tight_layout=True) if not isinstance(axs, list): axs = [axs] @@ -220,16 +220,6 @@ def cutoff_point_fevals_time(self, cutoff_percentile: float) -> tuple[float, int cutoff_point_time = cutoff_point_fevals * self.total_time_median() return cutoff_point_value, cutoff_point_fevals, cutoff_point_time - def _get_filepath(self, lowercase=True) -> Path: - """Returns the filepath.""" - kernel_directory = self.kernel_name - if lowercase: - kernel_directory = kernel_directory.lower() - filename = f"{self.device_name}.json" - if lowercase: - filename = filename.lower() - return self.bruteforced_caches_path / kernel_directory / filename - def get_valid_filepath(self) -> Path: """Returns the filepath to the Searchspace statistics .json file if it exists. @@ -239,9 +229,9 @@ def get_valid_filepath(self) -> Path: Returns: Filepath to the Searchspace statistics .json file. """ - filepath = self._get_filepath() + filepath = self.full_search_space_file_path if not filepath.exists(): - filepath = self._get_filepath(lowercase=False) + filepath = Path(str(self.full_search_space_file_path) + ".json") if not filepath.exists(): # if the file is not found, raise an error from os import getcwd @@ -252,7 +242,7 @@ def get_valid_filepath(self) -> Path: return filepath def _is_not_invalid_value(self, value, performance: bool) -> bool: - """Checks if a cache performance or time value is an array or is not invalid.""" + """Checks if a performance or time value is an array or is not invalid.""" if isinstance(value, str): return False if isinstance(value, (list, tuple, np.ndarray)): @@ -260,16 +250,30 @@ def _is_not_invalid_value(self, value, performance: bool) -> bool: invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time return not invalid_check_function(value) - def _to_valid_array(self, cache_values: list[dict], key: str, performance: bool) -> np.ndarray: - """Convert valid cache performance or time values to a numpy array, sum if the input is a list of arrays.""" + def _to_valid_array(self, results: list[dict], key: str, performance: bool) -> np.ndarray: + """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" # make a list of all valid values - values = list( - v[key] if key in v and self._is_not_invalid_value(v[key], performance) else np.nan for v in cache_values - ) + if performance: + values = list() + for r in results: + for m in r["measurements"]: + if key == m["name"]: + if self._is_not_invalid_value(m["value"], performance): + values.append(m["value"]) + else: + values.append(np.nan) + else : + values = list( + v["times"][key] + if key in v["times"] and self._is_not_invalid_value(v["times"][key], performance) + else np.nan + for v in results + ) + # TODO other that time, performance such as power usage are in results["measurements"]. or not? # check if there are values that are arrays for value_index, value in enumerate(values): if isinstance(value, (list, tuple, np.ndarray)): - # if the cache value is an array, sum the valid values + # if the value is an array, sum the valid values array = value list_to_sum = list(v for v in array if self._is_not_invalid_value(v, performance)) values[value_index] = ( @@ -281,10 +285,12 @@ def _to_valid_array(self, cache_values: list[dict], key: str, performance: bool) return np.array(values) def _load(self) -> bool: - """Load the contents of the cache file.""" + """Load the contents of the full search space file.""" + # TODO check if the file is in KernelTuner format + # if not, use a script to create a file with values from KTT output and formatting of KernelTuner filepath = self.get_valid_filepath() with open(filepath, "r", encoding="utf-8") as fh: - print(f"Loading statistics for {filepath}...") + print(f"Loading full search space file {filepath} and initializing the statistics...") # get the cache from the .json file orig_contents = fh.read() try: @@ -296,24 +302,25 @@ def _load(self) -> bool: except json.decoder.JSONDecodeError: contents = orig_contents[:-2] + "}\n}" data = json.loads(contents) - cache: dict = data["cache"] - self.cache = cache + + results: dict = data["results"] + self.results = results # get the time values per configuration - cache_values = list(cache.values()) - self.size = len(cache_values) + self.size = len(data["results"]) self.objective_times = dict() for key in self.objective_time_keys: - self.objective_times[key] = self._to_valid_array(cache_values, key, performance=False) - self.objective_times[key] = ( - self.objective_times[key] / 1000 - ) # TODO Kernel Tuner specific miliseconds to seconds conversion + self.objective_times[key] = self._to_valid_array(results, key, performance=False) + #self.objective_times[key] = ( + # self.objective_times[key] / 1000 + #) # TODO Kernel Tuner specific miliseconds to seconds conversion + # in runner.convert_KTT_output_to_standard all times get converted to ms assert ( self.objective_times[key].ndim == 1 ), f"Should have one dimension, has {self.objective_times[key].ndim}" - assert self.objective_times[key].shape[0] == len( - cache_values - ), f"Should have the same size as cache_values ({self.size}), has {self.objective_times[key].shape[0]}" + assert ( + self.objective_times[key].shape[0] == self.size + ), f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" assert not np.all( np.isnan(self.objective_times[key]) ), f"""All values for {key=} are NaN. @@ -322,13 +329,13 @@ def _load(self) -> bool: # get the performance values per configuration self.objective_performances = dict() for key in self.objective_performance_keys: - self.objective_performances[key] = self._to_valid_array(cache_values, key, performance=True) + self.objective_performances[key] = self._to_valid_array(results, key, performance=True) assert ( self.objective_performances[key].ndim == 1 ), f"Should have one dimension, has {self.objective_performances[key].ndim}" - assert self.objective_performances[key].shape[0] == len( - cache_values - ), f"""Should have the same size as cache_values ({self.size}), + assert ( + self.objective_performances[key].shape[0] == self.size + ), f"""Should have the same size as results ({self.size}), has {self.objective_performances[key].shape[0]}""" assert not np.all( np.isnan(self.objective_performances[key]) @@ -336,10 +343,11 @@ def _load(self) -> bool: Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" # get the number of repeats - valid_cache_index: int = 0 - while "times" not in cache_values[valid_cache_index]: - valid_cache_index += 1 - self.repeats = len(cache_values[valid_cache_index]["times"]) + # TODO is this necessary? number of repeats is given in experiments setup file + #valid_cache_index: int = 0 + #while "times" not in cache_values[valid_cache_index]: + # valid_cache_index += 1 + #self.repeats = len(cache_values[valid_cache_index]["times"]) # combine the arrays to the shape [len(objective_keys), self.size] self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) @@ -389,10 +397,6 @@ def _load(self) -> bool: return True - def get_value_in_config(self, config: str, key: str): - """Get the value for a key given a configuration.""" - return self.cache[config][key] - def get_num_duplicate_values(self, value: float) -> int: """Get the number of duplicate values in the searchspace.""" duplicates = np.count_nonzero(np.where(self.objective_performances_total == value, 1, 0)) - 1 @@ -402,7 +406,7 @@ def get_num_duplicate_values(self, value: float) -> int: def mean_strategy_time_per_feval(self) -> float: """Gets the average time spent on the strategy per function evaluation.""" - if "strategy" in self.objective_times: + if "search_algorithm" in self.objective_times: strategy_times = self.objective_times invalid_mask = np.isnan(self.objective_performances_total) if not all(invalid_mask): From 77289b0bbd428833bd1400ee3752b32cf67df50f Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Tue, 13 Aug 2024 09:46:27 +0200 Subject: [PATCH 006/234] Update visualization scripts regarding new schema However, not working. --- src/autotuning_methodology/curves.py | 8 +- .../report_experiments.py | 38 ++++------ .../visualize_experiments.py | 73 +++++++++---------- 3 files changed, 56 insertions(+), 63 deletions(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index a839b4f..fc32058 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -230,10 +230,10 @@ def __init__(self, results_description: ResultsDescription) -> None: results_description: the ResultsDescription object containing the data for the Curve. """ # inputs - self.name = results_description.strategy_name - self.display_name = results_description.strategy_display_name + self.name = results_description.group_name + self.display_name = results_description.group_display_name self.device_name = results_description.device_name - self.kernel_name = results_description.kernel_name + self.application_name = results_description.application_name self.stochastic = results_description.stochastic self.minimization = results_description.minimization @@ -265,7 +265,7 @@ def check_attributes(self) -> None: assert isinstance(self.name, str) assert isinstance(self.display_name, str) assert isinstance(self.device_name, str) - assert isinstance(self.kernel_name, str) + assert isinstance(self.application_name, str) assert isinstance(self.stochastic, bool) assert isinstance(self._x_fevals, np.ndarray) assert isinstance(self._x_time, np.ndarray) diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 2f64de0..a391c89 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -14,22 +14,22 @@ from autotuning_methodology.searchspace_statistics import SearchspaceStatistics -def get_aggregation_data_key(gpu_name: str, kernel_name: str): +def get_aggregation_data_key(gpu_name: str, application_name: str): """Utility function to get the key for data in the aggregation data dictionary. Args: gpu_name: the GPU name - kernel_name: the kernel name Returns: The key as a string. """ - return f"{gpu_name}+{kernel_name}" + return f"{gpu_name}+{application_name}" def get_aggregation_data( experiment_folderpath: Path, experiment: dict, + searchspace_statistics: dict[str, dict[str, SearchspaceStatistics]], strategies: dict, results_descriptions: dict, cutoff_percentile: float, @@ -61,23 +61,16 @@ def get_aggregation_data( time_resolution = int(time_resolution) aggregation_data: dict[str, tuple[Baseline, list[Curve], SearchspaceStatistics, np.ndarray]] = dict() - for gpu_name in experiment["GPUs"]: - for kernel_name in experiment["kernels"]: + for gpu_name in experiment["experimental_groups_defaults"]["gpus"]: + for application_name in experiment["experimental_groups_defaults"]["applications_names"]: # get the statistics - searchspace_stats = SearchspaceStatistics( - kernel_name=kernel_name, - device_name=gpu_name, - minimization=minimization, - objective_time_keys=experiment["objective_time_keys"], - objective_performance_keys=experiment["objective_performance_keys"], - bruteforced_caches_path=experiment_folderpath / experiment["bruteforced_caches_path"], - ) + searchspace_stats = searchspace_statistics[gpu_name][application_name] # get the cached strategy results as curves strategies_curves: list[Curve] = list() baseline_executed_strategy = None for strategy in strategies: - results_description = results_descriptions[gpu_name][kernel_name][strategy["name"]] + results_description = results_descriptions[gpu_name][application_name][strategy["name"]] if results_description is None: raise ValueError( f"""Strategy {strategy['display_name']} not in results_description, @@ -108,7 +101,7 @@ def get_aggregation_data( ) # collect aggregatable data - aggregation_data[get_aggregation_data_key(gpu_name, kernel_name)] = tuple( + aggregation_data[get_aggregation_data_key(gpu_name, application_name)] = tuple( [random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range] ) @@ -203,20 +196,21 @@ def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None) a dictionary of the strategies, with the performance score and error for each strategy. """ # execute the experiment if necessary, else retrieve it - experiment, strategies, results_descriptions = execute_experiment(experiment_filepath, profiling=False) - experiment_folderpath = Path(experiment_filepath).parent + experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment(experiment_filepath, profiling=False) + experiment_folderpath = experiment["parent_folder_absolute_path"] # get the settings - minimization: bool = experiment.get("minimization", True) - cutoff_percentile: float = experiment["cutoff_percentile"] - cutoff_percentile_start: float = experiment.get("cutoff_percentile_start", 0.01) - time_resolution: float = experiment.get("resolution", 1e4) - confidence_level: float = experiment["plot"].get("confidence_level", 0.95) + minimization: bool = experiment["statistics_settings"]["minimization"] + cutoff_percentile: float = experiment["statistics_settings"]["cutoff_percentile"] + cutoff_percentile_start: float = experiment["statistics_settings"]["cutoff_percentile_start"] + time_resolution: float = experiment["visualization_settings"]["resolution"] + confidence_level: float = experiment["visualization_settings"]["confidence_level"] # aggregate the data aggregation_data = get_aggregation_data( experiment_folderpath, experiment, + searchspace_statistics, strategies, results_descriptions, cutoff_percentile, diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index a2ee993..5cd28bd 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -127,8 +127,6 @@ class Visualize: "baseline", ] # absolute values, scatterplot, median-absolute normalized, improvement over baseline - plot_filename_prefix_parent = "generated_plots" - def __init__( self, experiment_filepath: str, @@ -154,44 +152,44 @@ def __init__( # # silently execute the experiment # with warnings.catch_warnings(): # warnings.simplefilter("ignore") - self.experiment, self.strategies, self.results_descriptions = execute_experiment( + self.experiment, self.all_experimental_groups, self.searchspace_statistics, self.results_descriptions = execute_experiment( experiment_filepath, profiling=False ) - experiment_folderpath = Path(experiment_filepath).parent - experiment_folder_id: str = self.experiment["folder_id"] - assert isinstance(experiment_folder_id, str) and len(experiment_folder_id) > 0 - self.plot_filename_prefix = f"{self.plot_filename_prefix_parent}/{experiment_folder_id}/" + experiment_folder: Path = self.experiment["parent_folder_absolute_path"] + assert isinstance(experiment_folder, Path) + self.plot_filename_prefix = experiment_folder.joinpath("run", "generated_graphs") print("\n") print("Visualizing") # preparing filesystem if save_figs or save_extra_figs: - Path(self.plot_filename_prefix_parent).mkdir(exist_ok=True) Path(self.plot_filename_prefix).mkdir(exist_ok=True) + # search strategies are search methods defined in experiments setup file + # self.all_experimental_groups are all combinations of gpu+application+search method that got executed + self.strategies = self.experiment["search_strategies"] # settings - self.minimization: bool = self.experiment.get("minimization", True) - cutoff_percentile: float = self.experiment["cutoff_percentile"] - cutoff_percentile_start: float = self.experiment.get("cutoff_percentile_start", 0.01) - cutoff_type: str = self.experiment.get("cutoff_type", "fevals") + self.minimization: bool = self.experiment["statistics_settings"]["minimization"] + cutoff_percentile: float = self.experiment["statistics_settings"]["cutoff_percentile"] + cutoff_percentile_start: float = self.experiment["statistics_settings"]["cutoff_percentile_start"] + cutoff_type: str = self.experiment["statistics_settings"]["cutoff_type"] assert cutoff_type == "fevals" or cutoff_type == "time", f"cutoff_type != 'fevals' or 'time', is {cutoff_type}" - time_resolution: float = self.experiment.get("resolution", 1e4) + time_resolution: float = self.experiment["visualization_settings"]["resolution"] if int(time_resolution) != time_resolution: raise ValueError(f"The resolution must be an integer, yet is {time_resolution}.") time_resolution = int(time_resolution) - objective_time_keys: list[str] = self.experiment["objective_time_keys"] + objective_time_keys: list[str] = self.experiment["statistics_settings"]["objective_time_keys"] # plot settings - plot_settings: dict = self.experiment["plot"] - plot_x_value_types: list[str] = plot_settings["plot_x_value_types"] - plot_y_value_types: list[str] = plot_settings["plot_y_value_types"] - compare_baselines: bool = plot_settings.get("compare_baselines", False) - compare_split_times: bool = plot_settings.get("compare_split_times", False) - confidence_level: float = plot_settings.get("confidence_level", 0.95) + plot_x_value_types: list[str] = self.experiment["visualization_settings"]["x_axis_value_types"] + plot_y_value_types: list[str] = self.experiment["visualization_settings"]["y_axis_value_types"] + compare_baselines: bool = self.experiment["visualization_settings"]["compare_baselines"] + compare_split_times: bool = self.experiment["visualization_settings"]["compare_split_times"] + confidence_level: float = self.experiment["visualization_settings"]["confidence_level"] self.colors = get_colors( self.strategies, - scale_margin_left=plot_settings.get("color_parent_scale_margin_left", 0.4), - scale_margin_right=plot_settings.get("color_parent_scale_margin_right", 0.1), + scale_margin_left=self.experiment["visualization_settings"].get("color_parent_scale_margin_left", 0.4), + scale_margin_right=self.experiment["visualization_settings"].get("color_parent_scale_margin_right", 0.1), ) self.plot_skip_strategies: list[str] = list() if use_strategy_as_baseline is not None: @@ -199,8 +197,9 @@ def __init__( # visualize aggregation_data = get_aggregation_data( - experiment_folderpath, + experiment_folder, self.experiment, + self.searchspace_statistics, self.strategies, self.results_descriptions, cutoff_percentile, @@ -210,15 +209,15 @@ def __init__( time_resolution, use_strategy_as_baseline, ) - for gpu_name in self.experiment["GPUs"]: - for kernel_name in self.experiment["kernels"]: - print(f" | visualizing optimization of {kernel_name} for {gpu_name}") - title = f"{kernel_name} on {gpu_name}" + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + print(f" | visualizing optimization of {application_name} for {gpu_name}") + title = f"{application_name} on {gpu_name}" title = title.replace("_", " ") # unpack the aggregation data random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = aggregation_data[ - get_aggregation_data_key(gpu_name=gpu_name, kernel_name=kernel_name) + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) ] # baseline_time_interpolated = np.linspace(mean_feval_time, cutoff_point_time, time_resolution) @@ -319,7 +318,7 @@ def __init__( searchspace_stats, strategies_curves, x_axis_range, - plot_settings, + self.experiment["visualization_settings"], random_baseline, baselines_extra=baselines_extra, ) @@ -331,7 +330,7 @@ def __init__( fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) fig.tight_layout() if save_figs: - filename = f"{self.plot_filename_prefix}{title}_{x_type}" + filename = f"{self.plot_filename_prefix}/{title}_{x_type}" filename = filename.replace(" ", "_") fig.savefig(filename, dpi=300) print(f"Figure saved to {filename}") @@ -349,17 +348,17 @@ def __init__( ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] - title = f"""Aggregated Data\nkernels: - {', '.join(self.experiment['kernels'])}\nGPUs: {', '.join(self.experiment['GPUs'])}""" + title = f"""Aggregated Data\napplications: + {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" fig.canvas.manager.set_window_title(title) if not save_figs: fig.suptitle(title) # finalize the figure and save or display it - self.plot_strategies_aggregated(axs[0], aggregation_data, plot_settings=plot_settings) + self.plot_strategies_aggregated(axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"]) fig.tight_layout() if save_figs: - filename = f"{self.plot_filename_prefix}aggregated" + filename = f"{self.plot_filename_prefix}/aggregated" filename = filename.replace(" ", "_") fig.savefig(filename, dpi=300) print(f"Figure saved to {filename}") @@ -446,7 +445,7 @@ def plot_baselines_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_baselines" + filename = f"{self.plot_filename_prefix}/{title}_baselines" filename = filename.replace(" ", "_") plt.savefig(filename, dpi=300) print(f"Figure saved to {filename}") @@ -533,7 +532,7 @@ def plot_split_times_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_split_times_{x_type}" + filename = f"{self.plot_filename_prefix}/{title}_split_times_{x_type}" filename = filename.replace(" ", "_") plt.savefig(filename, dpi=300) print(f"Figure saved to {filename}") @@ -641,7 +640,7 @@ def plot_split_times_bar_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}{title}_split_times_bar" + filename = f"{self.plot_filename_prefix}/{title}_split_times_bar" filename = filename.replace(" ", "_") plt.savefig(filename, dpi=300) print(f"Figure saved to {filename}") From 13a8ffc3b652f8c4afeb937a6667e764f734830c Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Wed, 14 Aug 2024 15:04:51 +0200 Subject: [PATCH 007/234] Fix version number for kernel_tuner in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 108a48d..7fdb7d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.0.1", + "kernel_tuner >= 1.0.0", ] [project.optional-dependencies] From a28a240422b7b9f681c04c5e090ee3a206584f94 Mon Sep 17 00:00:00 2001 From: Jana Hozzova Date: Tue, 20 Aug 2024 13:42:39 +0200 Subject: [PATCH 008/234] Add example experiment files for KTT --- .../ktt_coulomb_two_autotuner_versions.json | 60 ++++++++++++++++ .../ktt_coulomb_two_search_methods.json | 69 +++++++++++++++++++ ...ktt_vectorAddition_two_search_methods.json | 69 +++++++++++++++++++ 3 files changed, 198 insertions(+) create mode 100644 experiment_files/ktt_coulomb_two_autotuner_versions.json create mode 100644 experiment_files/ktt_coulomb_two_search_methods.json create mode 100644 experiment_files/ktt_vectorAddition_two_search_methods.json diff --git a/experiment_files/ktt_coulomb_two_autotuner_versions.json b/experiment_files/ktt_coulomb_two_autotuner_versions.json new file mode 100644 index 0000000..e57af50 --- /dev/null +++ b/experiment_files/ktt_coulomb_two_autotuner_versions.json @@ -0,0 +1,60 @@ +{ + "version": "1.0.0", + "name": "Random vs. Random KTT 2.1 and KTT 2.2 on Coulomb", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_coulomb_two_versions_KTT", + "experimental_groups_defaults": { + "applications": [ + { + "name": "coulomb", + "input_file" : "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + } + ], + "gpus": ["2080"], + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 5, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random-2.1", + "search_method": "Random", + "display_name": "Random with KTT 2.1", + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/" + }, + { + "name": "random-2.2", + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT-2.2/Build/x86_64_Release/", + "search_method": "Random", + "display_name": "Random with KTT 2.2" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.2, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} diff --git a/experiment_files/ktt_coulomb_two_search_methods.json b/experiment_files/ktt_coulomb_two_search_methods.json new file mode 100644 index 0000000..c83023b --- /dev/null +++ b/experiment_files/ktt_coulomb_two_search_methods.json @@ -0,0 +1,69 @@ +{ + "version": "1.0.0", + "name": "Random vs. profbased searcher KTT 2.1 on Coulomb", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_coulomb_two_methods", + "experimental_groups_defaults": { + "applications": [ + { + "name": "coulomb", + "input_file" : "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + } + ], + "gpus": ["2080"], + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/", + "set_this_to_pythonpath": "/home/janka/KTT/Build/x86_64_Release/:/home/janka/KTT/Scripts", + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 5, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random", + "search_method": "Random", + "display_name": "Random" + }, + { + "name": "profbased", + "search_method": "ProfileBased", + "search_method_hyperparameters": [ + { + "name": "modelPath", + "value": "/home/janka/KTT/Examples/CoulombSum3d/Models/2080-coulomb_output_DT.sav" + }, + { + "name": "batchSize", + "value": "5" + } + ], + "display_name": "Profile-based searcher" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.2, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} diff --git a/experiment_files/ktt_vectorAddition_two_search_methods.json b/experiment_files/ktt_vectorAddition_two_search_methods.json new file mode 100644 index 0000000..25cf5d0 --- /dev/null +++ b/experiment_files/ktt_vectorAddition_two_search_methods.json @@ -0,0 +1,69 @@ +{ + "version": "1.0.0", + "name": "Random vs. profbased searcher KTT 2.1", + "parent_folder": "/home/janka/autotuning_methodology_experiments/test_vectorAddition_two_methods", + "experimental_groups_defaults": { + "applications": [ + { + "name": "vectorAddition", + "input_file" : "/home/janka/KTT/Tutorials/03KernelTuning/KernelTuningCudaScript.json", + "folder": "/home/janka/KTT/Tutorials/03KernelTuning/" + } + ], + "gpus": ["2080"], + "autotuner": "KTT", + "autotuner_path": "/home/janka/KTT/Build/x86_64_Release/", + "set_this_to_pythonpath": "/home/janka/KTT/Build/x86_64_Release:/home/janka/KTT/Scripts", + "stochastic": true, + "repeats": 2, + "samples": 1, + "minimum_number_of_valid_search_iterations": 2, + "ignore_cache": true + }, + "search_strategies": [ + { + "name": "random", + "search_method": "Random", + "display_name": "Random" + }, + { + "name": "profbased", + "search_method": "ProfileBased", + "search_method_hyperparameters": [ + { + "name": "modelPath", + "value": "/home/janka/KTT/Tutorials/03KernelTuning/2080-vectorAdd_output_DT.sav" + }, + { + "name": "batchSize", + "value": "1" + } + ], + "display_name": "Profile-based searcher" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.1, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "TotalDuration" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute" + ], + "resolution": 2, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} From 6810ededba94caea39ca8164e2e3e7e5bb3c414d Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 25 Sep 2024 12:41:53 -0700 Subject: [PATCH 009/234] Update --- .../methodology_paper_evaluation_2.json | 149 ++++++++++++++++++ src/autotuning_methodology/runner.py | 84 +++++----- 2 files changed, 191 insertions(+), 42 deletions(-) create mode 100644 experiment_files/methodology_paper_evaluation_2.json diff --git a/experiment_files/methodology_paper_evaluation_2.json b/experiment_files/methodology_paper_evaluation_2.json new file mode 100644 index 0000000..1850e4c --- /dev/null +++ b/experiment_files/methodology_paper_evaluation_2.json @@ -0,0 +1,149 @@ +{ + "version": "1.0.0", + "name": "Methodology paper evaluation", + "parent_folder": "methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "coulomb", + "input_file": "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + }, + { + "name": "convolution", + "input_file": "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", + "folder": "/home/janka/KTT/Examples/CoulombSum3d" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils", + "search_method": "greedy_ils", + "display_name": "Greedy ILS", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1e3, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } + + + "kernels_path": "../cached_data_used/kernels", + "bruteforced_caches_path": "../cached_data_used/cachefiles", + "visualization_caches_path": "../cached_data_used/visualizations", + "kernels": [ + "convolution", + "pnpoly" + ], + // "GPUs": [ + // "RTX_3090", + // "RTX_2080_Ti" + // ], + // "minimization": true, + // "resolution": 1e3, + // "cutoff_percentile": 0.96, + // "cutoff_percentile_start": 0.5, + // "cutoff_type": "fevals", + // "objective_time_keys": [ + // "compilation", + // "benchmark", + // "framework", + // "search_algorithm", + // "validation" + // ], + // "objective_performance_keys": [ + // "time" + // ], + // "plot": { + // "plot_x_value_types": [ + // "fevals", + // "time", + // "aggregated" + // ], + // "plot_y_value_types": [ + // "normalized", + // "baseline" + // ], + // "confidence_level": 0.95, + // "compare_baselines": false, + // "compare_split_times": false + // }, + // "strategy_defaults": { + // "iterations": 32, + // "repeats": 100, + // "minimum_number_of_evaluations": 20, + // "stochastic": true, + "record_data": [ + "time", + "GFLOP/s" + ] + // }, + // "strategies": [ + // { + // "name": "genetic_algorithm", + // "strategy": "genetic_algorithm", + // "display_name": "Genetic Algorithm" + // }, + // { + // "name": "dual_annealing", + // "strategy": "dual_annealing", + // "display_name": "Dual Annealing" + // }, + // { + // "name": "greedy_ils", + // "strategy": "greedy_ils", + // "display_name": "Greedy ILS" + // }, + // { + // "name": "ktt_profile_searcher", + // "strategy": "profile_searcher", + // "display_name": "KTT Profile Searcher" + // } + // ] +} \ No newline at end of file diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index c75e2ec..c843b65 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -5,11 +5,11 @@ import contextlib import json import os +import subprocess import time as python_time import warnings from inspect import getfile from pathlib import Path -import subprocess import numpy as np import progressbar @@ -23,7 +23,7 @@ is_valid_config_result, ) -#TODO this does not conform to new intedned dicrectory structure +# TODO this does not conform to new intended dicrectory structure folder = Path(__file__).parent.parent.parent # Imported runs must be remapped to have the same keys, values and order of parameters as the other runs. @@ -85,16 +85,17 @@ def load_json(path: Path): with path.open() as file_results: return json.load(file_results) + def convert_KTT_output_to_standard(output_filename: Path) -> dict: - with open(output_filename, 'r', encoding="utf-8") as fp: + with open(output_filename, "r", encoding="utf-8") as fp: ktt_output = json.load(fp) ktt_result_status_mapping = { - "Ok":"correct", - "ComputationFailed":"runtime", - "ValidationFailed":"correctness", - "CompilationFailed":"compile", - "DeviceLimitsExceeded":"runtime" + "Ok": "correct", + "ComputationFailed": "runtime", + "ValidationFailed": "correctness", + "CompilationFailed": "compile", + "DeviceLimitsExceeded": "runtime", # timeout is marked as ComputationFailed in KTT # constraints is marked as CompilationFailed in KTT } @@ -124,10 +125,10 @@ def convert_KTT_output_to_standard(output_filename: Path) -> dict: converted_result["times"] = {} # compilation time can be also calculated as sum of "Overhead" in all ComputationResults, it's just easier to do it this way in case of multiple kernel functions within one application converted_result["times"]["compilation_time"] = timemapper( - ktt_result["TotalOverhead"] - - ktt_result["DataMovementOverhead"] - - ktt_result["SearcherOverhead"] - - ktt_result["ValidationOverhead"] + ktt_result["TotalOverhead"] + - ktt_result["DataMovementOverhead"] + - ktt_result["SearcherOverhead"] + - ktt_result["ValidationOverhead"] ) converted_result["times"]["runtimes"] = [timemapper(ktt_result["TotalDuration"])] converted_result["times"]["framework"] = timemapper(ktt_result["DataMovementOverhead"]) @@ -140,18 +141,17 @@ def convert_KTT_output_to_standard(output_filename: Path) -> dict: else: converted_result["correctness"] = 1 converted_result["measurements"] = [] - converted_result["measurements"].append({ - "name": "TotalDuration", - "value": timemapper(ktt_result["TotalDuration"]), - "unit": "milliseconds" - }) + converted_result["measurements"].append( + {"name": "TotalDuration", "value": timemapper(ktt_result["TotalDuration"]), "unit": "milliseconds"} + ) # TODO what do we want here in case of multiple ComputationResults for multiple kernel functions? if "ProfilingData" in ktt_result["ComputationResults"][0]: for pc in ktt_result["ComputationResults"][0]["ProfilingData"]["Counters"]: - converted_result["measurements"].append({"name":pc["Name"], "value":pc["Value"], "unit": ""}) + converted_result["measurements"].append({"name": pc["Name"], "value": pc["Value"], "unit": ""}) converted_output["results"].append(converted_result) return converted_output + def get_kerneltuner_results_and_metadata( filename_results: str = f"{folder}../last_run/_tune_configuration-results.json", filename_metadata: str = f"{folder}../last_run/_tune_configuration-metadata.json", @@ -175,7 +175,7 @@ def tune( application_name: str, device_name: str, group: dict, - tune_options: dict, #TODO check if still necessary when we have input json file + tune_options: dict, # TODO check if still necessary when we have input json file profiling: bool, searchspace_stats: SearchspaceStatistics, ) -> tuple[list, list, int]: @@ -254,26 +254,28 @@ def tune_with_KTT(): # TODO check if changing the directory is necessary, I think it was just looking for cu file, which is not actually necessary in simulated execution with temporary_working_directory_change(group["application_folder"]): # copy the modified input file (with inserted search method, budget, etc.) - subprocess.run( - ["cp", str(group["input_file"]), str(group["application_folder"])], - check=False - ) + subprocess.run(["cp", str(group["input_file"]), str(group["application_folder"])], check=False) try: # execute KttTuningLauncher from autotuner_path directory executable = Path(group["autotuner_path"]).joinpath("KttTuningLauncher") if group.get("set_this_to_pythonpath") is None: - proc_result = subprocess.run([str(executable), group["input_file"].name], - capture_output=True, check=True, - env = os.environ | {'PYTHONPATH':group["autotuner_path"]} + subprocess.run( + [str(executable), group["input_file"].name], + capture_output=True, + check=True, + env=os.environ | {"PYTHONPATH": group["autotuner_path"]}, ) else: - subprocess.run([str(executable), group["input_file"].name], - capture_output=True, check=True, - env = os.environ | {'PYTHONPATH':group["set_this_to_pythonpath"]} + subprocess.run( + [str(executable), group["input_file"].name], + capture_output=True, + check=True, + env=os.environ | {"PYTHONPATH": group["set_this_to_pythonpath"]}, ) # TODO this is a bug in KTT, sometimes it returns non-zero exit code even though nothing bad happened - # catching the expcetion here then covers even the situation when KTT fails, but I write the output just to let the user know what is going on if there is a runtime error + # catching the exception here then covers even the situation when KTT fails, but I write the output + # just to let the user know what is going on if there is a runtime error except subprocess.CalledProcessError as er: print(er.stdout) print(er.stderr) @@ -293,16 +295,11 @@ def tune_with_KTT(): raise ValueError("Less than two configurations were returned") return metadata, results, total_time_ms - def get_KTT_results_and_metadata( - output_filename: str - #use_param_mapping=True - ) -> tuple[dict, list, float] : - # TODO not sure what use_param_mapping was for + def get_KTT_results_and_metadata(output_filename: str) -> tuple[dict, list, float]: """Retrieves results from KTT run. Args: output_filename: file with KTT output - use_param_mapping: used in testing? Returns: A tuple, a dictionary with metadata, a list of results and a float with total experiment duration in ms. @@ -319,10 +316,15 @@ def get_KTT_results_and_metadata( # add to total time total_duration = 0 for m in result["measurements"]: - if m["name"] is "TotalDuration": + if m["name"] == "TotalDuration": total_duration = m["value"] break - total_overhead = result["times"]["compilation_time"] + result["times"]["framework"] + result["times"]["search_algorithm"] + result["times"]["validation"] + total_overhead = ( + result["times"]["compilation_time"] + + result["times"]["framework"] + + result["times"]["search_algorithm"] + + result["times"]["validation"] + ) total_time_ms += total_duration + total_overhead return metadata, results, round(total_time_ms) @@ -458,9 +460,7 @@ def get_nan_array() -> np.ndarray: objective_performance_results = get_nan_array() objective_performance_best_results = get_nan_array() objective_performance_stds = get_nan_array() - objective_time_results_per_key = np.full( - (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan - ) + objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan) objective_performance_results_per_key = np.full( (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan ) @@ -495,7 +495,7 @@ def get_nan_array() -> np.ndarray: else: value = evaluation_times[key] if value is not None and not is_invalid_objective_time(value): - #value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner + # value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner objective_time_results_per_key[key_index, evaluation_index, repeat_index] = value objective_times_list.append(value) # sum the objective times of the keys From 25397e8b55ac9c8818fe138e09f8bd9e31607f2c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 30 Sep 2024 23:45:05 -0700 Subject: [PATCH 010/234] Wrote conversion script to new experiments file --- experiment_files/convert_old_to_new_format.py | 84 +++++++++++++++++++ src/autotuning_methodology/schema.json | 5 +- 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 experiment_files/convert_old_to_new_format.py diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py new file mode 100644 index 0000000..47d412b --- /dev/null +++ b/experiment_files/convert_old_to_new_format.py @@ -0,0 +1,84 @@ +# script to convert the old experiments file format into the new format +import json +from pathlib import Path + +from jsonschema import validate + +from autotuning_methodology.experiments import get_experiment_schema_filepath + +# set input and output files +folderpath = Path(__file__).parent +old_file_path = folderpath / Path("methodology_paper_evaluation.json") +new_file_path = folderpath / Path("methodology_paper_evaluation_new.json") +schema_path = Path(get_experiment_schema_filepath()) +encoding = "utf-8" +assert old_file_path.exists(), f"Old file does not exist at {old_file_path}" +assert not new_file_path.exists(), f"New file does already exists at {new_file_path}" +assert schema_path.exists(), f"Schema file does not exist at {schema_path}" + +# read input file to dictionary +with old_file_path.open("r", encoding=encoding) as fp: + old_experiment: dict = json.load(fp) + +# convert the dictionary to the new format +new_experiment = { + "version": "1.0.0", + "name": old_experiment["name"], + "parent_folder": f"./{old_experiment['folder_id']}", + "experimental_groups_defaults": { + "applications": [ + { + "name": kernel, + "input_file": f"{old_experiment['kernels_path']}/{kernel}", + "folder": f"{old_experiment['visualization_caches_path']}/{kernel}", + } + for kernel in old_experiment["kernels"] + ], + "gpus": old_experiment["GPUs"], + "pattern_for_full_search_space_filenames": { + "regex": f"{old_experiment['bruteforced_caches_path']}/" + "${applications}/${gpus}.json" + }, + "stochastic": old_experiment["strategy_defaults"]["stochastic"], + "repeats": old_experiment["strategy_defaults"]["repeats"], + "samples": old_experiment["strategy_defaults"]["iterations"], + "minimum_number_of_valid_search_iterations": old_experiment["strategy_defaults"][ + "minimum_number_of_evaluations" + ], + "ignore_cache": False, + }, + "search_strategies": [ + { + "name": strategy["name"], + "search_method": strategy["strategy"], + "display_name": strategy["display_name"], + "autotuner": "KernelTuner", # Assuming autotuner is KernelTuner for all strategies + } + for strategy in old_experiment["strategies"] + if strategy["name"] != "ktt_profile_searcher" + ], + "statistics_settings": { + "minimization": old_experiment["minimization"], + "cutoff_percentile": old_experiment["cutoff_percentile"], + "cutoff_percentile_start": old_experiment["cutoff_percentile_start"], + "cutoff_type": old_experiment["cutoff_type"], + "objective_time_keys": ["all"], # Mapped to 'all' + "objective_performance_keys": old_experiment["objective_performance_keys"], + }, + "visualization_settings": { + "x_axis_value_types": old_experiment["plot"]["plot_x_value_types"], + "y_axis_value_types": old_experiment["plot"]["plot_y_value_types"], + "resolution": old_experiment["resolution"], + "confidence_level": old_experiment["plot"]["confidence_level"], + "compare_baselines": old_experiment["plot"]["compare_baselines"], + "compare_split_times": old_experiment["plot"]["compare_split_times"], + }, +} + +# validate using schema +with schema_path.open("r", encoding=encoding) as fp: + schema = json.load(fp) + validate(new_experiment, schema) + +# write converted dictionary to file +with new_file_path.open("w", encoding=encoding) as fp: + json.dump(new_experiment, fp) diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schema.json index c0c8deb..9ff24a6 100755 --- a/src/autotuning_methodology/schema.json +++ b/src/autotuning_methodology/schema.json @@ -76,13 +76,12 @@ "description": "Pattern for filenames of full search space files", "type": "object", "required": [ - "regex", - "regex_variables" + "regex" ], "properties": { "regex": { "type": "string", - "pattern": "(.*\\${gpus}.*\\${applications}.*\\.json)|(.*\\${kernel}.*\\${gpu}.*\\.json)", + "pattern": ".*.json", "examples": [ "${gpus}_${applications}_output.json", "full-search-space-${applications}-${gpus}.json" From 0ee3bb5ede5058709c5bf6ff199c8c05b362c1db Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 11:12:51 -0700 Subject: [PATCH 011/234] Updated to new experiments schema --- .../methodology_paper_evaluation_2.json | 91 +-------- .../methodology_paper_evaluation_new.json | 78 ++++++++ src/autotuning_methodology/experiments.py | 181 +++++++++--------- 3 files changed, 179 insertions(+), 171 deletions(-) create mode 100644 experiment_files/methodology_paper_evaluation_new.json diff --git a/experiment_files/methodology_paper_evaluation_2.json b/experiment_files/methodology_paper_evaluation_2.json index 1850e4c..bc5f0ba 100644 --- a/experiment_files/methodology_paper_evaluation_2.json +++ b/experiment_files/methodology_paper_evaluation_2.json @@ -1,24 +1,27 @@ { "version": "1.0.0", "name": "Methodology paper evaluation", - "parent_folder": "methodology_paper_evaluation", + "parent_folder": "./methodology_paper_evaluation", "experimental_groups_defaults": { "applications": [ { - "name": "coulomb", - "input_file": "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", - "folder": "/home/janka/KTT/Examples/CoulombSum3d" + "name": "convolution", + "input_file": "../cached_data_used/kernels/convolution", + "folder": "../cached_data_used/visualizations/convolution" }, { - "name": "convolution", - "input_file": "/home/janka/KTT/Examples/CoulombSum3d/CoulombSum3dCudaScript.json", - "folder": "/home/janka/KTT/Examples/CoulombSum3d" + "name": "pnpoly", + "input_file": "../cached_data_used/kernels/pnpoly", + "folder": "../cached_data_used/visualizations/pnpoly" } ], "gpus": [ "RTX_3090", "RTX_2080_Ti" ], + "pattern_for_full_search_space_filenames": { + "regex": "../cached_data_used/cachefiles/${applications}/${gpus}.json" + }, "stochastic": true, "repeats": 100, "samples": 32, @@ -72,78 +75,4 @@ "compare_baselines": false, "compare_split_times": false } - - - "kernels_path": "../cached_data_used/kernels", - "bruteforced_caches_path": "../cached_data_used/cachefiles", - "visualization_caches_path": "../cached_data_used/visualizations", - "kernels": [ - "convolution", - "pnpoly" - ], - // "GPUs": [ - // "RTX_3090", - // "RTX_2080_Ti" - // ], - // "minimization": true, - // "resolution": 1e3, - // "cutoff_percentile": 0.96, - // "cutoff_percentile_start": 0.5, - // "cutoff_type": "fevals", - // "objective_time_keys": [ - // "compilation", - // "benchmark", - // "framework", - // "search_algorithm", - // "validation" - // ], - // "objective_performance_keys": [ - // "time" - // ], - // "plot": { - // "plot_x_value_types": [ - // "fevals", - // "time", - // "aggregated" - // ], - // "plot_y_value_types": [ - // "normalized", - // "baseline" - // ], - // "confidence_level": 0.95, - // "compare_baselines": false, - // "compare_split_times": false - // }, - // "strategy_defaults": { - // "iterations": 32, - // "repeats": 100, - // "minimum_number_of_evaluations": 20, - // "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] - // }, - // "strategies": [ - // { - // "name": "genetic_algorithm", - // "strategy": "genetic_algorithm", - // "display_name": "Genetic Algorithm" - // }, - // { - // "name": "dual_annealing", - // "strategy": "dual_annealing", - // "display_name": "Dual Annealing" - // }, - // { - // "name": "greedy_ils", - // "strategy": "greedy_ils", - // "display_name": "Greedy ILS" - // }, - // { - // "name": "ktt_profile_searcher", - // "strategy": "profile_searcher", - // "display_name": "KTT Profile Searcher" - // } - // ] } \ No newline at end of file diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json new file mode 100644 index 0000000..841c8b5 --- /dev/null +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -0,0 +1,78 @@ +{ + "version": "1.0.0", + "name": "Methodology paper evaluation", + "parent_folder": "./methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": ".", + "input_file": "./cached_data_used/kernels/convolution.py" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/visualizations/methodology_paper_evaluation/pnpoly", + "input_file": "./cached_data_used/kernels/pnpoly.py" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils", + "search_method": "greedy_ils", + "display_name": "Greedy ILS", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index e60ca47..c6ccb18 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -6,17 +6,17 @@ from argparse import ArgumentParser from importlib.resources import files from math import ceil -from os import getcwd -from os import makedirs +from os import getcwd, makedirs from pathlib import Path from jsonschema import validate from autotuning_methodology.caching import ResultsDescription -from autotuning_methodology.runner import collect_results -from autotuning_methodology.runner import convert_KTT_output_to_standard +from autotuning_methodology.runner import collect_results, convert_KTT_output_to_standard from autotuning_methodology.searchspace_statistics import SearchspaceStatistics +PACKAGE_ROOT = Path(__file__).parent.parent.parent + def get_args_from_cli(args=None) -> str: """Set the Command Line Interface arguments definitions, get and return the argument values. @@ -32,15 +32,12 @@ def get_args_from_cli(args=None) -> str: """ cli = ArgumentParser() cli.add_argument( - "experiment", type=str, - help="The experiment setup json file to execute, see experiments/template.json" + "experiment", type=str, help="The experiment setup json file to execute, see experiments/template.json" ) args = cli.parse_args(args) filepath: str = args.experiment if filepath is None or filepath == "": - raise ValueError( - "Invalid '--experiment' option. Run 'visualize_experiments.py -h' to read more." - ) + raise ValueError("Invalid '--experiment' option. Run 'visualize_experiments.py -h' to read more.") return filepath @@ -54,23 +51,20 @@ def get_experiment_schema_filepath(): assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" return schemafile -def make_and_check_path(filename: str, parent = None, extension = None) -> Path: + +def make_and_check_path(filename: str, parent=None, extension=None) -> Path: filename_path = Path(filename) if filename_path.is_absolute() is False and parent is not None: - filename_path = Path(parent).joinpath(filename).resolve() + filename_path = PACKAGE_ROOT / Path(parent).joinpath(filename).resolve() if filename_path.exists(): return filename_path # try and add extension if extension is None: - raise FileNotFoundError( - f"{filename_path} does not exist." - ) + raise FileNotFoundError(f"{filename_path} does not exist.") filename_path = Path(str(filename_path) + extension) if filename_path.exists(): return filename_path - raise FileNotFoundError( - f"{filename_path} does not exist." - ) + raise FileNotFoundError(f"{filename_path} does not exist.") def get_experiment(filename: str) -> dict: @@ -120,39 +114,39 @@ def get_experimental_groups(experiment: dict) -> list[dict]: # set up the directory structure experiment["parent_folder_absolute_path"] = Path(experiment["parent_folder"]).resolve() # if folder "run" does not exist, create - makedirs(experiment["parent_folder_absolute_path"].joinpath("run"), exist_ok = True) - makedirs(experiment["parent_folder_absolute_path"].joinpath("setup"), exist_ok = True) + makedirs(experiment["parent_folder_absolute_path"].joinpath("run"), exist_ok=True) + makedirs(experiment["parent_folder_absolute_path"].joinpath("setup"), exist_ok=True) # create folders for each experimental group from file for strategy in search_strategies: - makedirs(experiment["parent_folder_absolute_path"].joinpath("run").joinpath(strategy["name"]), exist_ok = True) + makedirs(experiment["parent_folder_absolute_path"].joinpath("run").joinpath(strategy["name"]), exist_ok=True) # generate all experimental groups # with applications and gpus provided in experimental_groups_defaults # and search strategies provided in search_strategies all_experimental_groups = generate_all_experimental_groups( - search_strategies, - experimental_groups_defaults, - experiment["parent_folder_absolute_path"] + search_strategies, experimental_groups_defaults, experiment["parent_folder_absolute_path"] ) # additional check beyond validation - # if every experimental group has autotuner set - # set autotuner_path to default installation if not set by the user + # if every experimental group has autotuner set + # set autotuner_path to default installation if not set by the user for group in all_experimental_groups: if group.get("autotuner") is None: raise KeyError( "Property 'autotuner' must be set for all groups, either in experimental_groups_defaults or in experimental_groups. It is not set for", - group["full_name"] + group["full_name"], ) if group["autotuner"] == "KTT": if group["samples"] != 1: raise NotImplementedError( - f"KTT currently supports only one sample per run and output. Please set samples=1 for group['full_name']." + "KTT currently supports only one sample per run and output. Please set samples=1 for group['full_name']." ) if group.get("autotuner_path") is None: raise NotImplementedError( - "Default autotuner_path is not supported yet for KTT, please set autotuner_path for ", group["full_name"] + " to directory with KttTuningLauncher and pyktt.so, e.g. /home/user/KTT/Build/x86_64_Release." + "Default autotuner_path is not supported yet for KTT, please set autotuner_path for ", + group["full_name"] + + " to directory with KttTuningLauncher and pyktt.so, e.g. /home/user/KTT/Build/x86_64_Release.", ) elif Path(group["autotuner_path"]).exists() is False: raise FileNotFoundError( @@ -170,10 +164,9 @@ def get_experimental_groups(experiment: dict) -> list[dict]: return all_experimental_groups + def generate_all_experimental_groups( - search_strategies: list[dict], - experimental_groups_defaults: dict, - parent_folder_path: Path + search_strategies: list[dict], experimental_groups_defaults: dict, parent_folder_path: Path ) -> list[dict]: """Generates all experimental groups for the experiment as a combination of given applications, gpus and search strategies from experiments setup file. @@ -194,7 +187,9 @@ def generate_all_experimental_groups( for default in experimental_groups_defaults: if default not in group and default not in [ - "applications", "gpus", "pattern_for_full_search_space_filenames" + "applications", + "gpus", + "pattern_for_full_search_space_filenames", ]: group[default] = experimental_groups_defaults[default] @@ -204,10 +199,13 @@ def generate_all_experimental_groups( group["application_name"] = application["name"] group["application_folder"] = Path(application["folder"]) - group["application_input_file"] = make_and_check_path(application["input_file"], application["folder"], None) - group["input_file"] : Path + group["application_input_file"] = make_and_check_path( + application["input_file"], application["folder"], None + ) + group["input_file"]: Path group["input_file"] = parent_folder_path.joinpath("setup").joinpath( - "_".join([group["full_name"], "input.json"])) + "_".join([group["full_name"], "input.json"]) + ) if experimental_groups_defaults.get("pattern_for_full_search_space_filename") is None: group["full_search_space_file"] = get_full_search_space_filename_from_input_file( @@ -217,7 +215,7 @@ def generate_all_experimental_groups( group["full_search_space_file"] = get_full_search_space_filename_from_pattern( experimental_groups_defaults["pattern_for_full_search_space_filenames"], gpu, - application["name"] + application["name"], ) if group["autotuner"] == "KTT": @@ -225,14 +223,16 @@ def generate_all_experimental_groups( # note that full search space file in KTT output format still gets injected to input json, that is because KTT needs to have that file in its own format # the converted file is loaded with this package when calculating search space statistics group["converted_full_search_space_file"] = convert_KTT_to_standard_full_search_space_file( - group["full_search_space_file"], - parent_folder_path.joinpath("setup") + group["full_search_space_file"], parent_folder_path.joinpath("setup") ) - group["output_file"] : Path - group["output_file"] = parent_folder_path.joinpath("run").joinpath( - group["name"]).joinpath(group["full_name"] + ".json").resolve() - + group["output_file"]: Path + group["output_file"] = ( + parent_folder_path.joinpath("run") + .joinpath(group["name"]) + .joinpath(group["full_name"] + ".json") + .resolve() + ) generate_input_file(group) experimental_groups.append(group) @@ -252,18 +252,22 @@ def get_full_search_space_filename_from_input_file(input_filename: Path) -> Path Returns: A path to full search space file that was written in the input json file. """ - with open(input_filename, 'r', encoding="utf-8") as input_file: + with open(input_filename, "r", encoding="utf-8") as input_file: input_json = json.load(input_file) if input_json["KernelSpecification"].get("SimulationInput") is None: raise KeyError( - "SimulationInput, i.e. full search space file is expected and not defined in", input_filename, ". Please set the path to that file in KernelSpecification.SimulationInput in input json file or set pattern_for_full_search_space_filename in experiments setup json file.") - full_search_space_filename = make_and_check_path(input_json["KernelSpecification"]["SimulationInput"], str(input_filename.parent), ".json") + "SimulationInput, i.e. full search space file is expected and not defined in", + input_filename, + ". Please set the path to that file in KernelSpecification.SimulationInput in input json file or set pattern_for_full_search_space_filename in experiments setup json file.", + ) + full_search_space_filename = make_and_check_path( + input_json["KernelSpecification"]["SimulationInput"], str(input_filename.parent), ".json" + ) # need to return filename WITHOUT .json, KTT (and probably also others) needs that in SimulationInput in input json as other autotuner can take other formats return full_search_space_filename.parent.joinpath(full_search_space_filename.stem) -def get_full_search_space_filename_from_pattern( - pattern: dict, gpu: str, application_name: str -) -> Path: + +def get_full_search_space_filename_from_pattern(pattern: dict, gpu: str, application_name: str) -> Path: """Returns a path to full search space file that is generated from the pattern provided in experiments setup file. Args: @@ -278,15 +282,16 @@ def get_full_search_space_filename_from_pattern( A path to full search file generated from the pattern. """ if pattern["regex_variables"] != ["applications", "gpus"]: - raise NotImplementedError("Other variables than applications and gpus in pattern for full search space filename are not supported yet. Sorry.") + raise NotImplementedError( + "Other variables than applications and gpus in pattern for full search space filename are not supported yet. Sorry." + ) filename = pattern["regex"].replace("${applications}", application_name).replace("${gpus}", gpu) full_search_space_filename = make_and_check_path(filename) return full_search_space_filename -def convert_KTT_to_standard_full_search_space_file( - full_search_space_file: Path, - setup_folder: Path) -> Path: - """ Converts KTT-formatted full search space file to the standard format recognized by this package. + +def convert_KTT_to_standard_full_search_space_file(full_search_space_file: Path, setup_folder: Path) -> Path: + """Converts KTT-formatted full search space file to the standard format recognized by this package. Args: full_search_space_file: the path to KTT-formatted full search space file @@ -298,15 +303,13 @@ def convert_KTT_to_standard_full_search_space_file( converted_output = convert_KTT_output_to_standard(full_search_space_file.with_suffix(".json")) converted_filename = setup_folder.joinpath(full_search_space_file.stem + "_converted.json") - with open(converted_filename, "w", encoding = "utf-8") as converted_file: + with open(converted_filename, "w", encoding="utf-8") as converted_file: json.dump(converted_output, converted_file, indent=4) return converted_filename -def calculate_budget( - group: dict, statistics_settings: dict, searchspace_stats: SearchspaceStatistics -) -> dict: +def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: SearchspaceStatistics) -> dict: """Calculates the budget for the experimental group, given cutoff point provided in experiments setup file. Args: @@ -330,9 +333,7 @@ def calculate_budget( if statistics_settings["cutoff_type"] == "time": group["budget"]["time_limit"] = cutoff_point_time * cutoff_margin else: - group["budget"]["max_fevals"] = min( - int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size - ) + group["budget"]["max_fevals"] = min(int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size) # write to group's input file as Budget with open(group["input_file"], "r", encoding="utf-8") as fp: @@ -343,7 +344,7 @@ def calculate_budget( if group["budget"].get("time_limit") is not None: input_json["Budget"][0]["Type"] = "TuningDuration" input_json["Budget"][0]["BudgetValue"] = group["budget"]["time_limit"] - else: #it's max_fevals + else: # it's max_fevals input_json["Budget"][0]["Type"] = "ConfigurationCount" input_json["Budget"][0]["BudgetValue"] = group["budget"]["max_fevals"] @@ -352,6 +353,7 @@ def calculate_budget( return group + def generate_input_file(group: dict): """Creates a input json file specific for a given application, gpu and search method. @@ -361,11 +363,11 @@ def generate_input_file(group: dict): with open(group["application_input_file"], "r", encoding="utf-8") as fp: input_json = json.load(fp) input_json["KernelSpecification"]["SimulationInput"] = str(group["full_search_space_file"]) - input_json["General"]["OutputFile"] = str( - group["output_file"].parent.joinpath(group["output_file"].stem) - ) + input_json["General"]["OutputFile"] = str(group["output_file"].parent.joinpath(group["output_file"].stem)) if input_json["General"]["OutputFormat"] != "JSON": - raise RuntimeError(f"Only JSON output format is supported. Please set General.OutputFormat to JSON in {group['application_input_file']}.") + raise RuntimeError( + f"Only JSON output format is supported. Please set General.OutputFormat to JSON in {group['application_input_file']}." + ) if input_json["KernelSpecification"].get("Device") is None: input_json["KernelSpecification"]["Device"] = {} input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] @@ -385,6 +387,7 @@ def generate_input_file(group: dict): with open(group["input_file"], "w", encoding="utf-8") as fp: json.dump(input_json, fp, indent=4) + def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, dict, dict]: """Executes the experiment by retrieving it from the cache or running it. @@ -413,7 +416,9 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di # open the experiment file and validate using the schema file with open(schemafile, "r", encoding="utf-8") as schemafile: schema = json.load(schemafile) - objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"]["items"]["enum"] + objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"][ + "items" + ]["enum"] objective_time_keys.remove("all") experiment["statistics_settings"]["objective_time_keys"] = objective_time_keys @@ -432,9 +437,7 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di for application in experiment["experimental_groups_defaults"]["applications_names"]: results_descriptions[gpu][application] = {} - - -# just iterate over experimental_groups, collect results and write to proper place + # just iterate over experimental_groups, collect results and write to proper place for group in all_experimental_groups: print(f" | - running on GPU '{group['gpu']}'") @@ -450,34 +453,31 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di full_search_space_file_path = group["converted_full_search_space_file"] searchspace_statistics[group["gpu"]][group["application_name"]] = SearchspaceStatistics( - application_name = group["application_name"], - device_name = group["gpu"], - minimization = experiment["statistics_settings"]["minimization"], - objective_time_keys = objective_time_keys, - objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"], - full_search_space_file_path = full_search_space_file_path, - ) + application_name=group["application_name"], + device_name=group["gpu"], + minimization=experiment["statistics_settings"]["minimization"], + objective_time_keys=objective_time_keys, + objective_performance_keys=experiment["statistics_settings"]["objective_performance_keys"], + full_search_space_file_path=full_search_space_file_path, + ) # calculation of budget can be done only now, after searchspace statistics have been initialized group = calculate_budget( - group, - experiment["statistics_settings"], - searchspace_statistics[group["gpu"]][group["application_name"]] + group, experiment["statistics_settings"], searchspace_statistics[group["gpu"]][group["application_name"]] ) results_description = ResultsDescription( - run_folder = experiment_folderpath/ "run" / group["name"], - application_name = group["application_name"], - device_name = group["gpu"], - group_name = group["name"], - group_display_name = group["display_name"], - stochastic = group["stochastic"], - objective_time_keys = objective_time_keys, - objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"], - minimization = experiment["statistics_settings"]["minimization"], + run_folder=experiment_folderpath / "run" / group["name"], + application_name=group["application_name"], + device_name=group["gpu"], + group_name=group["name"], + group_display_name=group["display_name"], + stochastic=group["stochastic"], + objective_time_keys=objective_time_keys, + objective_performance_keys=experiment["statistics_settings"]["objective_performance_keys"], + minimization=experiment["statistics_settings"]["minimization"], ) - # if the strategy is in the cache, use cached data if ("ignore_cache" not in group or group["ignore_cache"] is False) and results_description.has_results(): print(" | - | - | -> retrieved from cache") @@ -487,7 +487,7 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di group, results_description, searchspace_statistics[group["gpu"]][group["application_name"]], - profiling=profiling + profiling=profiling, ) # set the results @@ -495,6 +495,7 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di return experiment, all_experimental_groups, searchspace_statistics, results_descriptions + def entry_point(): # pragma: no cover """Entry point function for Experiments.""" experiment_filepath = get_args_from_cli() From 90c379041c6be230c87d9b2b29a61ac07e47aa56 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 11:22:17 -0700 Subject: [PATCH 012/234] Ensure a usable error is given in the case of an incorrect experiments file --- src/autotuning_methodology/experiments.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index c6ccb18..494f389 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -9,7 +9,7 @@ from os import getcwd, makedirs from pathlib import Path -from jsonschema import validate +from jsonschema import ValidationError, validate from autotuning_methodology.caching import ResultsDescription from autotuning_methodology.runner import collect_results, convert_KTT_output_to_standard @@ -95,8 +95,12 @@ def get_experiment(filename: str) -> dict: with open(path, "r", encoding="utf-8") as file, open(schemafile, "r", encoding="utf-8") as schemafile: schema = json.load(schemafile) experiment: dict = json.load(file) - validate(instance=experiment, schema=schema) - return experiment + try: + validate(instance=experiment, schema=schema) + return experiment + except ValidationError as e: + print(e) + raise ValidationError("Experiment file does not comply with schema") def get_experimental_groups(experiment: dict) -> list[dict]: From e6edebc41782b12d66285110d071cd3ffb74f58c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 12:22:36 -0700 Subject: [PATCH 013/234] Fixed an issue where cachefile patterns weren't recognized, improved error handling --- .../methodology_paper_evaluation_new.json | 4 +- ...3090_convolution_dual_annealing_input.json | 154 ++++++++++++++++++ ...0_convolution_genetic_algorithm_input.json | 154 ++++++++++++++++++ ...RTX_3090_convolution_greedy_ils_input.json | 154 ++++++++++++++++++ src/autotuning_methodology/experiments.py | 8 +- 5 files changed, 468 insertions(+), 6 deletions(-) create mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json create mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json create mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json index 841c8b5..64a6dde 100644 --- a/experiment_files/methodology_paper_evaluation_new.json +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -7,12 +7,12 @@ { "name": "convolution", "folder": ".", - "input_file": "./cached_data_used/kernels/convolution.py" + "input_file": "./cached_data_used/kernels/convolution.json" }, { "name": "pnpoly", "folder": "./cached_data_used/visualizations/methodology_paper_evaluation/pnpoly", - "input_file": "./cached_data_used/kernels/pnpoly.py" + "input_file": "./cached_data_used/kernels/pnpoly.json" } ], "gpus": [ diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json new file mode 100644 index 0000000..53a496b --- /dev/null +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json @@ -0,0 +1,154 @@ +{ + "General": { + "BenchmarkName": "convolution", + "OutputFormat": "JSON", + "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/dual_annealing/RTX_3090_convolution_dual_annealing" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "filter_width", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "filter_height", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "block_size_x", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 + }, + { + "Name": "tile_size_x", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "tile_size_y", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "use_padding", + "Type": "int", + "Values": "[0, 1]", + "Default": 1 + }, + { + "Name": "read_only", + "Type": "int", + "Values": "[0, 1]", + "Default": 0 + } + ], + "Conditions": [ + { + "Expression": "block_size_x*block_size_y>=64", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "block_size_x*block_size_y>=1024", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "tile_size_x*tile_size_y<30", + "Parameters": [ + "tile_size_x", + "tile_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": "ProblemSize[0]*ProblemSize[1]", + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ], + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "Device": { + "Name": "RTX_3090" + } + }, + "Search": { + "Name": "dual_annealing" + } +} \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json new file mode 100644 index 0000000..4c99495 --- /dev/null +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json @@ -0,0 +1,154 @@ +{ + "General": { + "BenchmarkName": "convolution", + "OutputFormat": "JSON", + "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/genetic_algorithm/RTX_3090_convolution_genetic_algorithm" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "filter_width", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "filter_height", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "block_size_x", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 + }, + { + "Name": "tile_size_x", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "tile_size_y", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "use_padding", + "Type": "int", + "Values": "[0, 1]", + "Default": 1 + }, + { + "Name": "read_only", + "Type": "int", + "Values": "[0, 1]", + "Default": 0 + } + ], + "Conditions": [ + { + "Expression": "block_size_x*block_size_y>=64", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "block_size_x*block_size_y>=1024", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "tile_size_x*tile_size_y<30", + "Parameters": [ + "tile_size_x", + "tile_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": "ProblemSize[0]*ProblemSize[1]", + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ], + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "Device": { + "Name": "RTX_3090" + } + }, + "Search": { + "Name": "genetic_algorithm" + } +} \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json new file mode 100644 index 0000000..9677598 --- /dev/null +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json @@ -0,0 +1,154 @@ +{ + "General": { + "BenchmarkName": "convolution", + "OutputFormat": "JSON", + "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/greedy_ils/RTX_3090_convolution_greedy_ils" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "filter_width", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "filter_height", + "Type": "int", + "Values": "[15]", + "Default": 15 + }, + { + "Name": "block_size_x", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 + }, + { + "Name": "tile_size_x", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "tile_size_y", + "Type": "int", + "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", + "Default": 1 + }, + { + "Name": "use_padding", + "Type": "int", + "Values": "[0, 1]", + "Default": 1 + }, + { + "Name": "read_only", + "Type": "int", + "Values": "[0, 1]", + "Default": 0 + } + ], + "Conditions": [ + { + "Expression": "block_size_x*block_size_y>=64", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "block_size_x*block_size_y>=1024", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "tile_size_x*tile_size_y<30", + "Parameters": [ + "tile_size_x", + "tile_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": "ProblemSize[0]*ProblemSize[1]", + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ], + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "Device": { + "Name": "RTX_3090" + } + }, + "Search": { + "Name": "greedy_ils" + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 494f389..65f7520 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -211,7 +211,7 @@ def generate_all_experimental_groups( "_".join([group["full_name"], "input.json"]) ) - if experimental_groups_defaults.get("pattern_for_full_search_space_filename") is None: + if experimental_groups_defaults.get("pattern_for_full_search_space_filenames") is None: group["full_search_space_file"] = get_full_search_space_filename_from_input_file( group["application_input_file"] ) @@ -285,11 +285,11 @@ def get_full_search_space_filename_from_pattern(pattern: dict, gpu: str, applica Returns: A path to full search file generated from the pattern. """ - if pattern["regex_variables"] != ["applications", "gpus"]: + filename = pattern["regex"].replace("${applications}", application_name).replace("${gpus}", gpu) + if "${" in filename: raise NotImplementedError( - "Other variables than applications and gpus in pattern for full search space filename are not supported yet. Sorry." + f"Variables other than applications and gpus are not yet supported for pattern matching. Unresolved: {filename}." ) - filename = pattern["regex"].replace("${applications}", application_name).replace("${gpus}", gpu) full_search_space_filename = make_and_check_path(filename) return full_search_space_filename From 9e0ae031f3785a7309c8aee49a387077c72ebe9c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 16:15:51 -0700 Subject: [PATCH 014/234] Several minor improvements --- .gitignore | 3 +++ .../methodology_paper_evaluation_new.json | 4 ++-- ...3090_convolution_dual_annealing_input.json | 2 +- ...0_convolution_genetic_algorithm_input.json | 2 +- ...RTX_3090_convolution_greedy_ils_input.json | 2 +- src/autotuning_methodology/experiments.py | 1 - .../searchspace_statistics.py | 22 ++++++++++--------- 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index f15a257..3f3f846 100755 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,9 @@ cached_data_used/visualizations/* cached_data_used/last_run/* cached_data_used/import_runs/* +# ignore setup files +*/setup/*_input.json + # exceptions !cached_data_used/cachefiles/ktt_values_to_kerneltuner.py diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json index 64a6dde..1b9e25b 100644 --- a/experiment_files/methodology_paper_evaluation_new.json +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -11,7 +11,7 @@ }, { "name": "pnpoly", - "folder": "./cached_data_used/visualizations/methodology_paper_evaluation/pnpoly", + "folder": ".", "input_file": "./cached_data_used/kernels/pnpoly.json" } ], @@ -20,7 +20,7 @@ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}.json" + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 100, diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json index 53a496b..b48845c 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", "Device": { "Name": "RTX_3090" } diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json index 4c99495..e2327c7 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", "Device": { "Name": "RTX_3090" } diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json index 9677598..b43c792 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", "Device": { "Name": "RTX_3090" } diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 65f7520..77465cd 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -206,7 +206,6 @@ def generate_all_experimental_groups( group["application_input_file"] = make_and_check_path( application["input_file"], application["folder"], None ) - group["input_file"]: Path group["input_file"] = parent_folder_path.joinpath("setup").joinpath( "_".join([group["full_name"], "input.json"]) ) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 0cb6fcf..d2203c7 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -5,8 +5,8 @@ import json from math import ceil, floor from pathlib import Path -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import numpy as np from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time @@ -262,11 +262,13 @@ def _to_valid_array(self, results: list[dict], key: str, performance: bool) -> n values.append(m["value"]) else: values.append(np.nan) - else : + else: values = list( - v["times"][key] - if key in v["times"] and self._is_not_invalid_value(v["times"][key], performance) - else np.nan + ( + v["times"][key] + if key in v["times"] and self._is_not_invalid_value(v["times"][key], performance) + else np.nan + ) for v in results ) # TODO other that time, performance such as power usage are in results["measurements"]. or not? @@ -311,9 +313,9 @@ def _load(self) -> bool: self.objective_times = dict() for key in self.objective_time_keys: self.objective_times[key] = self._to_valid_array(results, key, performance=False) - #self.objective_times[key] = ( + # self.objective_times[key] = ( # self.objective_times[key] / 1000 - #) # TODO Kernel Tuner specific miliseconds to seconds conversion + # ) # TODO Kernel Tuner specific miliseconds to seconds conversion # in runner.convert_KTT_output_to_standard all times get converted to ms assert ( self.objective_times[key].ndim == 1 @@ -344,10 +346,10 @@ def _load(self) -> bool: # get the number of repeats # TODO is this necessary? number of repeats is given in experiments setup file - #valid_cache_index: int = 0 - #while "times" not in cache_values[valid_cache_index]: + # valid_cache_index: int = 0 + # while "times" not in cache_values[valid_cache_index]: # valid_cache_index += 1 - #self.repeats = len(cache_values[valid_cache_index]["times"]) + # self.repeats = len(cache_values[valid_cache_index]["times"]) # combine the arrays to the shape [len(objective_keys), self.size] self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) From 4b5f376bec3456cd9249872dc9af2b3722f47913 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 20:28:47 -0700 Subject: [PATCH 015/234] Generated T1 input files now have absolute kernel paths --- .../methodology_paper_evaluation_new.json | 8 ++++---- .../RTX_3090_convolution_dual_annealing_input.json | 2 +- .../RTX_3090_convolution_genetic_algorithm_input.json | 2 +- .../setup/RTX_3090_convolution_greedy_ils_input.json | 2 +- src/autotuning_methodology/experiments.py | 9 +++++++-- src/autotuning_methodology/runner.py | 11 ++++++++++- 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json index 1b9e25b..252c82e 100644 --- a/experiment_files/methodology_paper_evaluation_new.json +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -6,13 +6,13 @@ "applications": [ { "name": "convolution", - "folder": ".", - "input_file": "./cached_data_used/kernels/convolution.json" + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" }, { "name": "pnpoly", - "folder": ".", - "input_file": "./cached_data_used/kernels/pnpoly.json" + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" } ], "gpus": [ diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json index b48845c..308d5d1 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json @@ -86,7 +86,7 @@ ], "BenchmarkName": "GEMM", "KernelName": "convolution_kernel", - "KernelFile": "convolution.cu", + "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", "GlobalSizeType": "CUDA", "LocalSize": { "X": "block_size_x", diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json index e2327c7..9c1d7bd 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json @@ -86,7 +86,7 @@ ], "BenchmarkName": "GEMM", "KernelName": "convolution_kernel", - "KernelFile": "convolution.cu", + "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", "GlobalSizeType": "CUDA", "LocalSize": { "X": "block_size_x", diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json index b43c792..b8137ea 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json @@ -86,7 +86,7 @@ ], "BenchmarkName": "GEMM", "KernelName": "convolution_kernel", - "KernelFile": "convolution.cu", + "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", "GlobalSizeType": "CUDA", "LocalSize": { "X": "block_size_x", diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 77465cd..49f2cde 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -209,6 +209,7 @@ def generate_all_experimental_groups( group["input_file"] = parent_folder_path.joinpath("setup").joinpath( "_".join([group["full_name"], "input.json"]) ) + group["parent_folder_path"] = parent_folder_path if experimental_groups_defaults.get("pattern_for_full_search_space_filenames") is None: group["full_search_space_file"] = get_full_search_space_filename_from_input_file( @@ -229,8 +230,7 @@ def generate_all_experimental_groups( group["full_search_space_file"], parent_folder_path.joinpath("setup") ) - group["output_file"]: Path - group["output_file"] = ( + group["output_file"]: Path = ( parent_folder_path.joinpath("run") .joinpath(group["name"]) .joinpath(group["full_name"] + ".json") @@ -376,6 +376,11 @@ def generate_input_file(group: dict): input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] else: input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] + input_json["KernelSpecification"]["KernelFile"] = str( + Path( + Path(group["application_input_file"]).parent / Path(input_json["KernelSpecification"]["KernelFile"]) + ).resolve() + ) input_json["Search"] = {} input_json["Search"]["Name"] = group["search_method"] diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index c843b65..fd9cd0f 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -199,8 +199,11 @@ def tune( A tuple of the metadata, the results, and the total runtime in miliseconds. """ - def tune_with_kerneltuner(): + def tune_with_kerneltuner_old(): """Interface with kernel tuner to tune the kernel and return the results.""" + kernel = input_file + strategy = group + # get the path to the directory the kernel is in; can't use importlib.resources.files because its not a package kernel_directory = Path(getfile(kernel)).parent assert kernel_directory.is_dir() @@ -240,6 +243,12 @@ def tune_with_kerneltuner(): raise ValueError("Less than two configurations were returned") return metadata, results + def tune_with_kerneltuner(): + """Interface with Kernel Tuner to tune the kernel and return the results.""" + from kernel_tuner import tune_kernel_T1 + + tune_kernel_T1(input_file) + def tune_with_BAT(): """Interface to tune with the BAT benchmarking suite.""" # TODO integrate with BAT From 05e2e50984a9c9db7527e75f3eeeb1e038c39e80 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 22:02:52 -0700 Subject: [PATCH 016/234] Improved warnings, fixed order of tunable parameters --- ...3090_convolution_dual_annealing_input.json | 32 +++++++++---------- ...0_convolution_genetic_algorithm_input.json | 32 +++++++++---------- ...RTX_3090_convolution_greedy_ils_input.json | 32 +++++++++---------- src/autotuning_methodology/experiments.py | 6 ++++ src/autotuning_methodology/runner.py | 26 ++++++++++++--- 5 files changed, 76 insertions(+), 52 deletions(-) diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json index 308d5d1..681660f 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json @@ -7,10 +7,16 @@ "ConfigurationSpace": { "TuningParameters": [ { - "Name": "filter_width", + "Name": "block_size_x", "Type": "int", - "Values": "[15]", - "Default": 15 + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 }, { "Name": "filter_height", @@ -19,16 +25,16 @@ "Default": 15 }, { - "Name": "block_size_x", + "Name": "filter_width", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 + "Values": "[15]", + "Default": 15 }, { - "Name": "block_size_y", + "Name": "read_only", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 + "Values": "[0, 1]", + "Default": 0 }, { "Name": "tile_size_x", @@ -47,12 +53,6 @@ "Type": "int", "Values": "[0, 1]", "Default": 1 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 } ], "Conditions": [ @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", "Device": { "Name": "RTX_3090" } diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json index 9c1d7bd..69b34eb 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json @@ -7,10 +7,16 @@ "ConfigurationSpace": { "TuningParameters": [ { - "Name": "filter_width", + "Name": "block_size_x", "Type": "int", - "Values": "[15]", - "Default": 15 + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 }, { "Name": "filter_height", @@ -19,16 +25,16 @@ "Default": 15 }, { - "Name": "block_size_x", + "Name": "filter_width", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 + "Values": "[15]", + "Default": 15 }, { - "Name": "block_size_y", + "Name": "read_only", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 + "Values": "[0, 1]", + "Default": 0 }, { "Name": "tile_size_x", @@ -47,12 +53,6 @@ "Type": "int", "Values": "[0, 1]", "Default": 1 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 } ], "Conditions": [ @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", "Device": { "Name": "RTX_3090" } diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json index b8137ea..90cf636 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json @@ -7,10 +7,16 @@ "ConfigurationSpace": { "TuningParameters": [ { - "Name": "filter_width", + "Name": "block_size_x", "Type": "int", - "Values": "[15]", - "Default": 15 + "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[1, 2, 4, 8, 16, 32]", + "Default": 16 }, { "Name": "filter_height", @@ -19,16 +25,16 @@ "Default": 15 }, { - "Name": "block_size_x", + "Name": "filter_width", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 + "Values": "[15]", + "Default": 15 }, { - "Name": "block_size_y", + "Name": "read_only", "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 + "Values": "[0, 1]", + "Default": 0 }, { "Name": "tile_size_x", @@ -47,12 +53,6 @@ "Type": "int", "Values": "[0, 1]", "Default": 1 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 } ], "Conditions": [ @@ -143,7 +143,7 @@ "FillValue": 1.0 } ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090_T4.json", + "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", "Device": { "Name": "RTX_3090" } diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 49f2cde..28f353d 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -366,6 +366,12 @@ def generate_input_file(group: dict): with open(group["application_input_file"], "r", encoding="utf-8") as fp: input_json = json.load(fp) input_json["KernelSpecification"]["SimulationInput"] = str(group["full_search_space_file"]) + + # TODO dirty fix below for Kernel Tuner compatibility, instead implement reading T4 as cache in Kernel Tuner + input_json["KernelSpecification"]["SimulationInput"] = str( + input_json["KernelSpecification"]["SimulationInput"] + ).replace("_T4", "") + input_json["General"]["OutputFile"] = str(group["output_file"].parent.joinpath(group["output_file"].stem)) if input_json["General"]["OutputFormat"] != "JSON": raise RuntimeError( diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index fd9cd0f..6f9ac10 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -247,7 +247,21 @@ def tune_with_kerneltuner(): """Interface with Kernel Tuner to tune the kernel and return the results.""" from kernel_tuner import tune_kernel_T1 - tune_kernel_T1(input_file) + samples = group["samples"] + + metadata, results = tune_kernel_T1( + input_file, simulation_mode=True, output_T4=True, iterations=samples, strategy_options=group["budget"] + ) + if "max_fevals" in group["budget"]: + max_fevals = group["budget"]["max_fevals"] + num_results = len(results["results"]) + if num_results < max_fevals * 0.1: + warnings.warn( + f"Much fewer configurations were returned ({num_results}) than the requested {max_fevals}" + ) + if num_results < 2: + raise ValueError("Less than two configurations were returned") + return metadata, results def tune_with_BAT(): """Interface to tune with the BAT benchmarking suite.""" @@ -380,12 +394,14 @@ def collect_results( # TODO put the tune options in the .json in strategy_defaults? Make it Kernel Tuner independent tune_options = {"verbose": False, "quiet": True, "simulation_mode": True} - def report_multiple_attempts(rep: int, len_res: int, group_repeats: int): + def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): """If multiple attempts are necessary, report the reason.""" if len_res < 1: print(f"({rep+1}/{group_repeats}) No results found, trying once more...") elif len_res < min_num_evals: - print(f"Too few results found ({len_res} of {min_num_evals} required), trying once more...") + print( + f"Too few results found ({len_res} of {min_num_evals} required, attempt {attempt}), trying once more..." + ) else: print(f"({rep+1}/{group_repeats}) Only invalid results found, trying once more...") @@ -412,7 +428,7 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int): len_res: int = -1 while only_invalid or len_res < min_num_evals: if attempt > 0: - report_multiple_attempts(rep, len_res, group["repeats"]) + report_multiple_attempts(rep, len_res, group["repeats"], attempt) _, results, total_time_ms = tune( input_file, results_description.application_name, @@ -422,6 +438,8 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int): profiling, searchspace_stats, ) + if attempt >= 10: + raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") len_res = len(results) # check if there are only invalid configs in the first min_num_evals, if so, try again temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) From 30c82c385e083d2df8d520f5eafaa767fda41c52 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 23:32:58 -0700 Subject: [PATCH 017/234] Fixed an issue that led to groups being aliased instead of copied --- src/autotuning_methodology/experiments.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 28f353d..465209c 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -187,7 +187,7 @@ def generate_all_experimental_groups( for gpu in experimental_groups_defaults["gpus"]: for application in experimental_groups_defaults["applications"]: for strategy in search_strategies: - group = strategy + group = strategy.copy() for default in experimental_groups_defaults: if default not in group and default not in [ @@ -459,7 +459,10 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di print(f" | - | - | with settings of experimental group '{group['display_name']}'") # create SearchspaceStatistics for full search space file associated with this group, if it does not exist - if searchspace_statistics.get(group["gpu"]).get(group["application_name"]) is None: + if any( + searchspace_statistics.get(group["gpu"], {}).get(group["application_name"], {}) == null_val + for null_val in [None, {}] + ): full_search_space_file_path = None if group.get("converted_full_search_space_file") is None: full_search_space_file_path = group["full_search_space_file"] From ac9ed4007faaffd98f931587c2b5163361b44dec Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 23:33:44 -0700 Subject: [PATCH 018/234] Improved the paths figures are saved to --- .../visualize_experiments.py | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5cd28bd..574d46f 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -152,8 +152,8 @@ def __init__( # # silently execute the experiment # with warnings.catch_warnings(): # warnings.simplefilter("ignore") - self.experiment, self.all_experimental_groups, self.searchspace_statistics, self.results_descriptions = execute_experiment( - experiment_filepath, profiling=False + self.experiment, self.all_experimental_groups, self.searchspace_statistics, self.results_descriptions = ( + execute_experiment(experiment_filepath, profiling=False) ) experiment_folder: Path = self.experiment["parent_folder_absolute_path"] assert isinstance(experiment_folder, Path) @@ -330,10 +330,9 @@ def __init__( fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) fig.tight_layout() if save_figs: - filename = f"{self.plot_filename_prefix}/{title}_{x_type}" - filename = filename.replace(" ", "_") - fig.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_{x_type}".replace(" ", "_") + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -355,13 +354,14 @@ def __init__( fig.suptitle(title) # finalize the figure and save or display it - self.plot_strategies_aggregated(axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"]) + self.plot_strategies_aggregated( + axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] + ) fig.tight_layout() if save_figs: - filename = f"{self.plot_filename_prefix}/aggregated" - filename = filename.replace(" ", "_") - fig.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / "aggregated" + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -445,10 +445,9 @@ def plot_baselines_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}/{title}_baselines" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_baselines".replace(" ", "_") + plt.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -532,10 +531,9 @@ def plot_split_times_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}/{title}_split_times_{x_type}" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / "{title}_split_times_{x_type}".replace(" ", "_") + plt.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") else: plt.show() @@ -640,10 +638,9 @@ def plot_split_times_bar_comparison( # write to file or show if save_fig: - filename = f"{self.plot_filename_prefix}/{title}_split_times_bar" - filename = filename.replace(" ", "_") - plt.savefig(filename, dpi=300) - print(f"Figure saved to {filename}") + filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_bar".replace(" ", "_") + plt.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") else: plt.show() From a8d6506ded1e2a22655f484a343265f0554ce97f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 10 Oct 2024 23:34:43 -0700 Subject: [PATCH 019/234] Various minor changes enabling compatibility with the new experiments, T1 and T4 formats --- .gitignore | 1 + .../setup/RTX_3090_convolution_dual_annealing_input.json | 8 +++++++- .../RTX_3090_convolution_genetic_algorithm_input.json | 8 +++++++- .../setup/RTX_3090_convolution_greedy_ils_input.json | 8 +++++++- src/autotuning_methodology/report_experiments.py | 4 +++- src/autotuning_methodology/runner.py | 1 + src/autotuning_methodology/schema.json | 2 +- src/autotuning_methodology/searchspace_statistics.py | 2 +- 8 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 3f3f846..b6f1705 100755 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ cached_data_used/kernels/* cached_data_used/visualizations/* cached_data_used/last_run/* cached_data_used/import_runs/* +methodology_paper_evaluation/run/* # ignore setup files */setup/*_input.json diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json index 681660f..0e413e1 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json @@ -150,5 +150,11 @@ }, "Search": { "Name": "dual_annealing" - } + }, + "Budget": [ + { + "Type": "ConfigurationCount", + "BudgetValue": 213 + } + ] } \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json index 69b34eb..aa383af 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json @@ -150,5 +150,11 @@ }, "Search": { "Name": "genetic_algorithm" - } + }, + "Budget": [ + { + "Type": "ConfigurationCount", + "BudgetValue": 213 + } + ] } \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json index 90cf636..9e8e586 100644 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json +++ b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json @@ -150,5 +150,11 @@ }, "Search": { "Name": "greedy_ils" - } + }, + "Budget": [ + { + "Type": "ConfigurationCount", + "BudgetValue": 213 + } + ] } \ No newline at end of file diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index a391c89..101b17b 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -196,7 +196,9 @@ def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None) a dictionary of the strategies, with the performance score and error for each strategy. """ # execute the experiment if necessary, else retrieve it - experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment(experiment_filepath, profiling=False) + experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( + experiment_filepath, profiling=False + ) experiment_folderpath = experiment["parent_folder_absolute_path"] # get the settings diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 6f9ac10..1dd5f5d 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -438,6 +438,7 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt profiling, searchspace_stats, ) + results = results["results"] if attempt >= 10: raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") len_res = len(results) diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schema.json index 9ff24a6..a904992 100755 --- a/src/autotuning_methodology/schema.json +++ b/src/autotuning_methodology/schema.json @@ -250,7 +250,7 @@ "type": "array", "items": { "enum": [ - "compilation_time", + "compilation", "runtimes", "framework", "search_algorithm", diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index d2203c7..e84dc8c 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -44,7 +44,7 @@ class SearchspaceStatistics: objective_performances_total_sorted_nan: np.ndarray T4_time_keys_to_kernel_tuner_time_keys_mapping = { - "compilation_time": "compile_time", + "compilation": "compile_time", "runtimes": "benchmark_time", "framework": "framework_time", "search_algorithm": "strategy_time", From 64036bb00561891660b36719140a7984093ffd14 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 11 Oct 2024 10:56:51 -0700 Subject: [PATCH 020/234] Silence warnings when tuning, added experiments file for comparing BO --- experiment_files/compare_bo.json | 72 ++++++++ ...3090_convolution_dual_annealing_input.json | 160 ------------------ ...0_convolution_genetic_algorithm_input.json | 160 ------------------ ...RTX_3090_convolution_greedy_ils_input.json | 160 ------------------ src/autotuning_methodology/runner.py | 8 +- 5 files changed, 77 insertions(+), 483 deletions(-) create mode 100644 experiment_files/compare_bo.json delete mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json delete mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json delete mode 100644 methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json diff --git a/experiment_files/compare_bo.json b/experiment_files/compare_bo.json new file mode 100644 index 0000000..3328abf --- /dev/null +++ b/experiment_files/compare_bo.json @@ -0,0 +1,72 @@ +{ + "version": "1.0.0", + "name": "Methodology paper evaluation", + "parent_folder": "./methodology_paper_evaluation", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "bayes_opt_og", + "search_method": "bayes_opt", + "display_name": "Bayesian Optimization", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json deleted file mode 100644 index 0e413e1..0000000 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_dual_annealing_input.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "General": { - "BenchmarkName": "convolution", - "OutputFormat": "JSON", - "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/dual_annealing/RTX_3090_convolution_dual_annealing" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "block_size_x", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 - }, - { - "Name": "block_size_y", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 - }, - { - "Name": "filter_height", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "filter_width", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 - }, - { - "Name": "tile_size_x", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "tile_size_y", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "use_padding", - "Type": "int", - "Values": "[0, 1]", - "Default": 1 - } - ], - "Conditions": [ - { - "Expression": "block_size_x*block_size_y>=64", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "block_size_x*block_size_y>=1024", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "tile_size_x*tile_size_y<30", - "Parameters": [ - "tile_size_x", - "tile_size_y" - ] - } - ] - }, - "KernelSpecification": { - "Language": "CUDA", - "CompilerOptions": [ - "-std=c++11" - ], - "BenchmarkName": "GEMM", - "KernelName": "convolution_kernel", - "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 4096, - 4096 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [ - { - "Name": "output_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "WriteOnly", - "FillType": "Constant", - "Size": "ProblemSize[0]*ProblemSize[1]", - "FillValue": 0.0, - "Output": 1 - }, - { - "Name": "input_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "FillType": "Random", - "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", - "FillValue": 1.0 - }, - { - "Name": "d_filter", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "MemType": "Constant", - "FillType": "Random", - "Size": "max(filter_height) * max(filter_width)", - "FillValue": 1.0 - } - ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", - "Device": { - "Name": "RTX_3090" - } - }, - "Search": { - "Name": "dual_annealing" - }, - "Budget": [ - { - "Type": "ConfigurationCount", - "BudgetValue": 213 - } - ] -} \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json deleted file mode 100644 index aa383af..0000000 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_genetic_algorithm_input.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "General": { - "BenchmarkName": "convolution", - "OutputFormat": "JSON", - "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/genetic_algorithm/RTX_3090_convolution_genetic_algorithm" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "block_size_x", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 - }, - { - "Name": "block_size_y", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 - }, - { - "Name": "filter_height", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "filter_width", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 - }, - { - "Name": "tile_size_x", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "tile_size_y", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "use_padding", - "Type": "int", - "Values": "[0, 1]", - "Default": 1 - } - ], - "Conditions": [ - { - "Expression": "block_size_x*block_size_y>=64", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "block_size_x*block_size_y>=1024", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "tile_size_x*tile_size_y<30", - "Parameters": [ - "tile_size_x", - "tile_size_y" - ] - } - ] - }, - "KernelSpecification": { - "Language": "CUDA", - "CompilerOptions": [ - "-std=c++11" - ], - "BenchmarkName": "GEMM", - "KernelName": "convolution_kernel", - "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 4096, - 4096 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [ - { - "Name": "output_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "WriteOnly", - "FillType": "Constant", - "Size": "ProblemSize[0]*ProblemSize[1]", - "FillValue": 0.0, - "Output": 1 - }, - { - "Name": "input_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "FillType": "Random", - "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", - "FillValue": 1.0 - }, - { - "Name": "d_filter", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "MemType": "Constant", - "FillType": "Random", - "Size": "max(filter_height) * max(filter_width)", - "FillValue": 1.0 - } - ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", - "Device": { - "Name": "RTX_3090" - } - }, - "Search": { - "Name": "genetic_algorithm" - }, - "Budget": [ - { - "Type": "ConfigurationCount", - "BudgetValue": 213 - } - ] -} \ No newline at end of file diff --git a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json b/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json deleted file mode 100644 index 9e8e586..0000000 --- a/methodology_paper_evaluation/setup/RTX_3090_convolution_greedy_ils_input.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "General": { - "BenchmarkName": "convolution", - "OutputFormat": "JSON", - "OutputFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/methodology_paper_evaluation/run/greedy_ils/RTX_3090_convolution_greedy_ils" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "block_size_x", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128]", - "Default": 16 - }, - { - "Name": "block_size_y", - "Type": "int", - "Values": "[1, 2, 4, 8, 16, 32]", - "Default": 16 - }, - { - "Name": "filter_height", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "filter_width", - "Type": "int", - "Values": "[15]", - "Default": 15 - }, - { - "Name": "read_only", - "Type": "int", - "Values": "[0, 1]", - "Default": 0 - }, - { - "Name": "tile_size_x", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "tile_size_y", - "Type": "int", - "Values": "[1, 2, 3, 4, 5, 6, 7, 8]", - "Default": 1 - }, - { - "Name": "use_padding", - "Type": "int", - "Values": "[0, 1]", - "Default": 1 - } - ], - "Conditions": [ - { - "Expression": "block_size_x*block_size_y>=64", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "block_size_x*block_size_y>=1024", - "Parameters": [ - "block_size_x", - "block_size_y" - ] - }, - { - "Expression": "tile_size_x*tile_size_y<30", - "Parameters": [ - "tile_size_x", - "tile_size_y" - ] - } - ] - }, - "KernelSpecification": { - "Language": "CUDA", - "CompilerOptions": [ - "-std=c++11" - ], - "BenchmarkName": "GEMM", - "KernelName": "convolution_kernel", - "KernelFile": "/Users/fjwillemsen/University/PhD/OneDrive_Netherlands_eScience_Center/Projects/Bayesian Optimization in Kernel Tuner/Code/autotuning_methodology/cached_data_used/kernels/convolution.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 4096, - 4096 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [ - { - "Name": "output_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "WriteOnly", - "FillType": "Constant", - "Size": "ProblemSize[0]*ProblemSize[1]", - "FillValue": 0.0, - "Output": 1 - }, - { - "Name": "input_image", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "FillType": "Random", - "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", - "FillValue": 1.0 - }, - { - "Name": "d_filter", - "Type": "float", - "MemoryType": "Vector", - "AccessType": "ReadOnly", - "MemType": "Constant", - "FillType": "Random", - "Size": "max(filter_height) * max(filter_width)", - "FillValue": 1.0 - } - ], - "SimulationInput": "cached_data_used/cachefiles/convolution/RTX_3090.json", - "Device": { - "Name": "RTX_3090" - } - }, - "Search": { - "Name": "greedy_ils" - }, - "Budget": [ - { - "Type": "ConfigurationCount", - "BudgetValue": 213 - } - ] -} \ No newline at end of file diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 1dd5f5d..47adcdf 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -249,9 +249,11 @@ def tune_with_kerneltuner(): samples = group["samples"] - metadata, results = tune_kernel_T1( - input_file, simulation_mode=True, output_T4=True, iterations=samples, strategy_options=group["budget"] - ) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + metadata, results = tune_kernel_T1( + input_file, simulation_mode=True, output_T4=True, iterations=samples, strategy_options=group["budget"] + ) if "max_fevals" in group["budget"]: max_fevals = group["budget"]["max_fevals"] num_results = len(results["results"]) From 92ad96fe0ffa32d7dd120095089697b196b8af52 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 11 Oct 2024 17:13:05 -0700 Subject: [PATCH 021/234] Implemented experiments file generation --- src/autotuning_methodology/experiments.py | 43 ++++++++++++- .../experiments_defaults.json | 63 +++++++++++++++++++ .../integration/test_report.py | 4 +- 3 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 src/autotuning_methodology/experiments_defaults.json diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 465209c..b282f9c 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -89,10 +89,10 @@ def get_experiment(filename: str) -> dict: assert path.exists(), f"Path to experiment file does not exist, attempted path: {path}, CWD: {getcwd()}" # get the path to the schema - schemafile = get_experiment_schema_filepath() + schemafile_path = get_experiment_schema_filepath() # open the experiment file and validate using the schema file - with open(path, "r", encoding="utf-8") as file, open(schemafile, "r", encoding="utf-8") as schemafile: + with path.open("r", encoding="utf-8") as file, schemafile_path.open("r", encoding="utf-8") as schemafile: schema = json.load(schemafile) experiment: dict = json.load(file) try: @@ -402,6 +402,43 @@ def generate_input_file(group: dict): json.dump(input_json, fp, indent=4) +def generate_experiment_file( + name: str, + parent_folder: Path, + applications: list[dict], + gpus: list[str], + search_strategies: list[dict], + override: dict = None, + overwrite_existing_file=False, +): + """Creates an experiment file based on the given inputs and opinionated defaults.""" + experiment_file_path = Path(f"./{name}.json") + if experiment_file_path.exists() and overwrite_existing_file is False: + raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") + defaults_path = Path(__file__).parent / "experiment_defaults.json" + with defaults_path.open() as fp: + experiment: dict = json.load(fp) + + # write the arguments to the experiment file + experiment["name"] = name + experiment["parent_folder"] = str(parent_folder.resolve()) + experiment["experimental_groups_defaults"]["applications"] = applications + experiment["experimental_groups_defaults"]["gpus"] = gpus + experiment["search_strategies"] = search_strategies + if override is not None: + experiment.update(override) + + # validate and write to experiments file + schemafile_path = get_experiment_schema_filepath() + with schemafile_path.open("r", encoding="utf-8") as schemafile: + validate(experiment, schemafile) + with experiment_file_path.open("w", encoding="utf-8") as fp: + json.dump(experiment, fp) + + # return the location of the experiments file + return experiment_file_path.resolve() + + def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, dict, dict]: """Executes the experiment by retrieving it from the cache or running it. @@ -428,7 +465,7 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di # get the path to the schema schemafile = get_experiment_schema_filepath() # open the experiment file and validate using the schema file - with open(schemafile, "r", encoding="utf-8") as schemafile: + with schemafile.open("r", encoding="utf-8") as schemafile: schema = json.load(schemafile) objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"][ "items" diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json new file mode 100644 index 0000000..4fc9df3 --- /dev/null +++ b/src/autotuning_methodology/experiments_defaults.json @@ -0,0 +1,63 @@ +{ + "version": "1.0.0", + "name": "", + "parent_folder": ".", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" + } + ], + "gpus": [], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "x_axis_value_types": [ + "fevals", + "time", + "aggregated" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/test_report.py b/tests/autotuning_methodology/integration/test_report.py index 521d56f..a0240e4 100644 --- a/tests/autotuning_methodology/integration/test_report.py +++ b/tests/autotuning_methodology/integration/test_report.py @@ -10,7 +10,7 @@ normal_cachefiles_path, ) -from autotuning_methodology.experiments import get_experiment, get_strategies +from autotuning_methodology.experiments import get_experiment, get_experimental_groups from autotuning_methodology.report_experiments import get_strategy_scores # setup file paths @@ -44,7 +44,7 @@ def test_visualize_experiment(): # get the experiment details experiment_filepath = str(experiment_filepath_test) experiment = get_experiment(experiment_filepath) - strategies = get_strategies(experiment) + strategies = get_experimental_groups(experiment) # TODO fix this test that used to use get_strategies # get the scores strategies_scores = get_strategy_scores(experiment_filepath) From d1b43109cf03a3976a097a6069f2bc4f3bd4c46f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 16 Oct 2024 01:21:11 -0700 Subject: [PATCH 022/234] Generate an experiments file using provided values and defaults --- src/autotuning_methodology/experiments.py | 21 +- .../experiments_defaults.json | 11 +- src/autotuning_methodology/schema.json | 653 +++++++++--------- 3 files changed, 351 insertions(+), 334 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index b282f9c..a36a51e 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -405,33 +405,38 @@ def generate_input_file(group: dict): def generate_experiment_file( name: str, parent_folder: Path, - applications: list[dict], - gpus: list[str], search_strategies: list[dict], + applications: list[dict] = None, + gpus: list[str] = None, override: dict = None, overwrite_existing_file=False, ): """Creates an experiment file based on the given inputs and opinionated defaults.""" - experiment_file_path = Path(f"./{name}.json") + assert isinstance(name, str) and len(name) > 0, f"Name for experiment file must be valid, is '{name}'" + experiment_file_path = Path(f"./{name.replace(' ', '_')}.json") if experiment_file_path.exists() and overwrite_existing_file is False: raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") - defaults_path = Path(__file__).parent / "experiment_defaults.json" + defaults_path = Path(__file__).parent / "experiments_defaults.json" with defaults_path.open() as fp: experiment: dict = json.load(fp) # write the arguments to the experiment file experiment["name"] = name experiment["parent_folder"] = str(parent_folder.resolve()) - experiment["experimental_groups_defaults"]["applications"] = applications - experiment["experimental_groups_defaults"]["gpus"] = gpus experiment["search_strategies"] = search_strategies + if applications is not None: + experiment["experimental_groups_defaults"]["applications"] = applications + if gpus is not None: + experiment["experimental_groups_defaults"]["gpus"] = gpus if override is not None: - experiment.update(override) + for key, value in override.items(): + experiment[key].update(value) # validate and write to experiments file schemafile_path = get_experiment_schema_filepath() with schemafile_path.open("r", encoding="utf-8") as schemafile: - validate(experiment, schemafile) + schema = json.load(schemafile) + validate(experiment, schema) with experiment_file_path.open("w", encoding="utf-8") as fp: json.dump(experiment, fp) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 4fc9df3..f4decc5 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -6,18 +6,21 @@ "applications": [ { "name": "convolution", - "folder": "./cached_data_used/kernels", + "folder": "../autotuning_methodology/cached_data_used/kernels", "input_file": "convolution.json" }, { "name": "pnpoly", - "folder": "./cached_data_used/kernels", + "folder": "../autotuning_methodology/cached_data_used/kernels", "input_file": "pnpoly.json" } ], - "gpus": [], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 100, diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schema.json index a904992..8eab3a6 100755 --- a/src/autotuning_methodology/schema.json +++ b/src/autotuning_methodology/schema.json @@ -1,339 +1,348 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", - "version": "1.0.0", - "title": "Experiment", - "description": "An experiment setup configuration file", - "type": "object", - "required": [ - "name", - "parent_folder", - "experimental_groups_defaults", - "search_strategies", - "statistics_settings", - "visualization_settings" - ], - "properties": { - "version": { - "description": "Version number of the experiment setup configuration file standard", - "type": "string" - }, - "name": { - "description": "Name of the experiment", - "type": "string" - }, - "parent folder": { - "description": "Absolute or relative path of the folder to store all related files for this experiment. This folder needs to already exist.", - "type": "string", - "default": "./" - }, - "experimental_groups_defaults": { - "description": "Default settings for experimental groups", - "type": "object", - "required": [ - "applications", - "gpus" - ], - "properties": { - "autotuner": { - "description": "Autotuner that will be used to tune the experimental group. Has to be specified either in experimental_groups_defaults or in experimental group.", - "enum": [ - "KernelTuner", - "KTT" - ] + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", + "version": "1.0.0", + "title": "Experiment", + "description": "An experiment setup configuration file", + "type": "object", + "required": [ + "name", + "parent_folder", + "experimental_groups_defaults", + "search_strategies", + "statistics_settings", + "visualization_settings" + ], + "properties": { + "version": { + "description": "Version number of the experiment setup configuration file standard", + "type": "string" + }, + "name": { + "description": "Name of the experiment", + "type": "string" }, - "autotuner_path": { - "description": "Path to the library of the autotuner", - "type": "string" + "parent folder": { + "description": "Absolute or relative path of the folder to store all related files for this experiment. This folder needs to already exist.", + "type": "string", + "default": "./" }, - "applications": { - "description": "List of applications for which measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", - "type": "array", - "items": { + "experimental_groups_defaults": { + "description": "Default settings for experimental groups", "type": "object", "required": [ - "name", - "input_file" + "applications", + "gpus" ], "properties": { - "name": { - "type": "string" - }, - "input_file": { - "type": "string" - } + "autotuner": { + "description": "Autotuner that will be used to tune the experimental group. Has to be specified either in experimental_groups_defaults or in experimental group.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "applications": { + "description": "List of applications for which measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "object", + "required": [ + "name", + "input_file" + ], + "properties": { + "name": { + "type": "string" + }, + "input_file": { + "type": "string" + } + } + } + }, + "gpus": { + "description": "List of GPUs where measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", + "type": "array", + "items": { + "type": "string" + } + }, + "pattern_for_full_search_space_filenames": { + "description": "Pattern for filenames of full search space files", + "type": "object", + "required": [ + "regex" + ], + "properties": { + "regex": { + "type": "string", + "pattern": ".*.json", + "examples": [ + "${gpus}_${applications}_output.json", + "full-search-space-${applications}-${gpus}.json" + ] + }, + "regex_variables": { + "type": "array", + "items": { + "type": "string" + }, + "default": [ + "applications", + "gpus" + ] + } + } + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search algorithm", + "type": "integer", + "minimum": 1, + "default": 20 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } } - } - }, - "gpus": { - "description": "List of GPUs where measurements were taken and written to full search space files. Can be used in pattern_for_full_search_space_filename.", - "type": "array", - "items": { - "type": "string" - } }, - "pattern_for_full_search_space_filenames": { - "description": "Pattern for filenames of full search space files", - "type": "object", - "required": [ - "regex" - ], - "properties": { - "regex": { - "type": "string", - "pattern": ".*.json", - "examples": [ - "${gpus}_${applications}_output.json", - "full-search-space-${applications}-${gpus}.json" - ] - }, - "regex_variables": { - "type": "array", - "items": { - "type": "string" - }, - "default": ["applications", "gpus"] - } - } - }, - "stochastic": { - "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", - "type": "boolean", - "default": true - }, - "repeats": { - "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", - "type": "integer", - "minimum": 1, - "default": 100 - }, - "samples": { - "description": "How many samples of measurements for a single configuration are present in full search space file", - "type": "integer", - "minimum": 1, - "default": 32 - }, - "minimum_number_of_valid_search_iterations": { - "description": "How many non-error, valid configurations account for a single run of search algorithm", - "type": "integer", - "minimum": 1, - "default": 20 - }, - "ignore_cache": { - "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", - "type": "boolean", - "default": false - } - } - }, - "search_strategies": { - "description": "Settings for search strategies", - "type": "array", - "minItems": 1, - "uniqueItems": true, - "items": { - "type": "object", - "required": [ - "name", - "search_method", - "display_name" - ], - "properties": { - "name": { - "description": "Name of the search strategy", - "type": "string" - }, - "autotuner": { - "description": "Autotuner that will be used for tuning. Has to be specified either in experimental_groups_defaults or in search_strategies.", - "enum": [ - "KernelTuner", - "KTT" - ] - }, - "autotuner_path": { - "description": "Path to the library of the autotuner", - "type": "string" - }, - "search_method": { - "description": "Name of the search method as recognized by the autotuner", - "type": "string" - }, - "search_method_hyperparameters": { - "description": "A list of hyperparameters for the search method as recognized by the autotuner", + "search_strategies": { + "description": "Settings for search strategies", "type": "array", + "minItems": 1, + "uniqueItems": true, "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "value": { - "type": "string" + "type": "object", + "required": [ + "name", + "search_method", + "display_name" + ], + "properties": { + "name": { + "description": "Name of the search strategy", + "type": "string" + }, + "autotuner": { + "description": "Autotuner that will be used for tuning. Has to be specified either in experimental_groups_defaults or in search_strategies.", + "enum": [ + "KernelTuner", + "KTT" + ] + }, + "autotuner_path": { + "description": "Path to the library of the autotuner", + "type": "string" + }, + "search_method": { + "description": "Name of the search method as recognized by the autotuner", + "type": "string" + }, + "search_method_hyperparameters": { + "description": "A list of hyperparameters for the search method as recognized by the autotuner", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string", + "boolean", + "object", + "array" + ] + } + } + } + }, + "display_name": { + "description": "Name for the search strategy used in visualizations", + "type": "string" + }, + "stochastic": { + "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", + "type": "boolean", + "default": true + }, + "repeats": { + "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", + "type": "integer", + "minimum": 1, + "default": 100 + }, + "samples": { + "description": "How many samples of measurements for a single configuration are present in full search space file", + "type": "integer", + "minimum": 1, + "default": 32 + }, + "minimum_number_of_valid_search_iterations": { + "description": "How many non-error, valid configurations account for a single run of search strategy", + "type": "integer", + "minimum": 1, + "default": 20 + }, + "ignore_cache": { + "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", + "type": "boolean", + "default": false + } } - } } - }, - "display_name": { - "description": "Name for the search strategy used in visualizations", - "type": "string" - }, - "stochastic": { - "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", - "type": "boolean", - "default": true - }, - "repeats": { - "description": "How many times to repeat the run for a single experimental group (combination of application, GPU and search strategy)", - "type": "integer", - "minimum": 1, - "default": 100 - }, - "samples": { - "description": "How many samples of measurements for a single configuration are present in full search space file", - "type": "integer", - "minimum": 1, - "default": 32 - }, - "minimum_number_of_valid_search_iterations": { - "description": "How many non-error, valid configurations account for a single run of search strategy", - "type": "integer", - "minimum": 1, - "default": 20 - }, - "ignore_cache": { - "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", - "type": "boolean", - "default": false - } - } - } - }, - "statistics_settings": { - "description": "Settings for the statistics calculation", - "type": "object", - "required": [ - "minimization", - "cutoff_percentile", - "cutoff_percentile_start", - "cutoff_type", - "objective_time_keys", - "objective_performance_keys" - ], - "properties": { - "minimization": { - "description": "Whether the optimization aims to minimize or maximize", - "type": "boolean", - "default": true - }, - "cutoff_percentile": { - "description": "Fraction of difference between median and absolute optimum at which to stop the time range", - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - }, - "cutoff_percentile_start": { - "description": "Fraction of difference between median and absolute optimum at which to start the time range", - "type": "number", - "minimum": 0, - "exclusiveMaximum": 1 - }, - "cutoff_type": { - "description": "Whether to base the cutoff on function evaluations or time", - "type": "string", - "enum": [ - "fevals", - "time" - ] }, - "objective_time_keys": { - "description": "Time key(s) to use as the time objective. In case of multiple keys, the values are summed.", - "type": "array", - "items": { - "enum": [ - "compilation", - "runtimes", - "framework", - "search_algorithm", - "validation", - "all" - ] - }, - "uniqueItems": true, - "minItems": 1 - }, - "objective_performance_keys": { - "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", - "type": "array", - "items": { - "type": "string" - }, - "uniqueItems": true, - "minItems": 1 - } - } - }, - "visualization_settings": { - "description": "Settings for the visualizations", - "type": "object", - "required": [ - "resolution", - "x_axis_value_types", - "y_axis_value_types", - "confidence_level" - ], - "properties": { - "resolution": { - "description": "The resolution of the time range", - "type": "integer", - "minimum": 2 - }, - "x_axis_value_types": { - "description": "Types of value on the x-axis. Multiple values produces multiple (sub) plots.", - "type": "array", - "items": { - "type": "string", - "enum": [ - "fevals", - "time", - "aggregated" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "y_axis_value_types": { - "description": "Types of value on the y-axis. Multiple values produces multiple (sub) plots.", - "type": "array", - "items": { - "type": "string", - "enum": [ - "absolute", - "scatter", - "normalized", - "baseline" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "confidence_level": { - "description": "The confidence level used for the confidence / prediction interval, visualized as an error shade", - "type": "number", - "default": 0.95, - "exclusiveMinimum": 0, - "maximum": 1 - }, - "compare_baselines": { - "description": "[preview feature] Compare baselines to each other. Requires editing the baselines list in the `plot_baselines_comparison` function.", - "type": "boolean", - "default": false + "statistics_settings": { + "description": "Settings for the statistics calculation", + "type": "object", + "required": [ + "minimization", + "cutoff_percentile", + "cutoff_percentile_start", + "cutoff_type", + "objective_time_keys", + "objective_performance_keys" + ], + "properties": { + "minimization": { + "description": "Whether the optimization aims to minimize or maximize", + "type": "boolean", + "default": true + }, + "cutoff_percentile": { + "description": "Fraction of difference between median and absolute optimum at which to stop the time range", + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + }, + "cutoff_percentile_start": { + "description": "Fraction of difference between median and absolute optimum at which to start the time range", + "type": "number", + "minimum": 0, + "exclusiveMaximum": 1 + }, + "cutoff_type": { + "description": "Whether to base the cutoff on function evaluations or time", + "type": "string", + "enum": [ + "fevals", + "time" + ] + }, + "objective_time_keys": { + "description": "Time key(s) to use as the time objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "enum": [ + "compilation", + "runtimes", + "framework", + "search_algorithm", + "validation", + "all" + ] + }, + "uniqueItems": true, + "minItems": 1 + }, + "objective_performance_keys": { + "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true, + "minItems": 1 + } + } }, - "compare_split_times": { - "description": "[preview feature] Plot a comparison of split times for strategies and baselines", - "type": "boolean", - "default": false + "visualization_settings": { + "description": "Settings for the visualizations", + "type": "object", + "required": [ + "resolution", + "x_axis_value_types", + "y_axis_value_types", + "confidence_level" + ], + "properties": { + "resolution": { + "description": "The resolution of the time range", + "type": "integer", + "minimum": 2 + }, + "x_axis_value_types": { + "description": "Types of value on the x-axis. Multiple values produces multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "fevals", + "time", + "aggregated" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "y_axis_value_types": { + "description": "Types of value on the y-axis. Multiple values produces multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "absolute", + "scatter", + "normalized", + "baseline" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "confidence_level": { + "description": "The confidence level used for the confidence / prediction interval, visualized as an error shade", + "type": "number", + "default": 0.95, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "compare_baselines": { + "description": "[preview feature] Compare baselines to each other. Requires editing the baselines list in the `plot_baselines_comparison` function.", + "type": "boolean", + "default": false + }, + "compare_split_times": { + "description": "[preview feature] Plot a comparison of split times for strategies and baselines", + "type": "boolean", + "default": false + } + } } - } } - } -} +} \ No newline at end of file From 761f2c9054a9c9e234bc76217f0aa777b8ff3a36 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 12:04:35 -0700 Subject: [PATCH 023/234] Will infer display name from strategy name if not specifed --- src/autotuning_methodology/curves.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index fc32058..bd7ac5c 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -231,7 +231,10 @@ def __init__(self, results_description: ResultsDescription) -> None: """ # inputs self.name = results_description.group_name - self.display_name = results_description.group_display_name + try: + self.display_name = results_description.group_display_name + except AttributeError: + self.display_name = results_description.group_name.replace("_", " ").capitalize() self.device_name = results_description.device_name self.application_name = results_description.application_name self.stochastic = results_description.stochastic From 7b29cfdd8a7b457c1527ccd7fb5ba98d4897c9ae Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 12:08:05 -0700 Subject: [PATCH 024/234] Cutoff percentiles are properly read from new format --- src/autotuning_methodology/visualize_experiments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 574d46f..9de9b93 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -937,8 +937,8 @@ def plot_strategies_aggregated( print(f" | performance of {displayname}: {performance_score} (±{performance_score_std})") # set the axis - cutoff_percentile: float = self.experiment.get("cutoff_percentile", 1) - cutoff_percentile_start: float = self.experiment.get("cutoff_percentile_start", 0.01) + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) ax.set_xlabel( f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)", # noqa: E501 fontsize="large", From 53b1e700ccfec4014370ef6cfad852952365c79e Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 19:59:06 -0700 Subject: [PATCH 025/234] Simplified validation of experiments file format --- experiment_files/convert_old_to_new_format.py | 10 +---- .../methodology_paper_evaluation_new.json | 6 --- src/autotuning_methodology/experiments.py | 41 +++++-------------- src/autotuning_methodology/validators.py | 24 +++++++++++ .../integration/test_run_experiment.py | 15 ++----- 5 files changed, 40 insertions(+), 56 deletions(-) diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py index 47d412b..eb2654b 100644 --- a/experiment_files/convert_old_to_new_format.py +++ b/experiment_files/convert_old_to_new_format.py @@ -2,19 +2,15 @@ import json from pathlib import Path -from jsonschema import validate - -from autotuning_methodology.experiments import get_experiment_schema_filepath +from autotuning_methodology.validators import validate_experimentsfile # set input and output files folderpath = Path(__file__).parent old_file_path = folderpath / Path("methodology_paper_evaluation.json") new_file_path = folderpath / Path("methodology_paper_evaluation_new.json") -schema_path = Path(get_experiment_schema_filepath()) encoding = "utf-8" assert old_file_path.exists(), f"Old file does not exist at {old_file_path}" assert not new_file_path.exists(), f"New file does already exists at {new_file_path}" -assert schema_path.exists(), f"Schema file does not exist at {schema_path}" # read input file to dictionary with old_file_path.open("r", encoding=encoding) as fp: @@ -75,9 +71,7 @@ } # validate using schema -with schema_path.open("r", encoding=encoding) as fp: - schema = json.load(fp) - validate(new_experiment, schema) +validate_experimentsfile(new_experiment, encoding=encoding) # write converted dictionary to file with new_file_path.open("w", encoding=encoding) as fp: diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json index 252c82e..6e3816b 100644 --- a/experiment_files/methodology_paper_evaluation_new.json +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -40,12 +40,6 @@ "search_method": "dual_annealing", "display_name": "Dual Annealing", "autotuner": "KernelTuner" - }, - { - "name": "greedy_ils", - "search_method": "greedy_ils", - "display_name": "Greedy ILS", - "autotuner": "KernelTuner" } ], "statistics_settings": { diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index a36a51e..84fda40 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -4,16 +4,16 @@ import json from argparse import ArgumentParser -from importlib.resources import files from math import ceil from os import getcwd, makedirs from pathlib import Path -from jsonschema import ValidationError, validate +from jsonschema import ValidationError from autotuning_methodology.caching import ResultsDescription from autotuning_methodology.runner import collect_results, convert_KTT_output_to_standard from autotuning_methodology.searchspace_statistics import SearchspaceStatistics +from autotuning_methodology.validators import validate_experimentsfile PACKAGE_ROOT = Path(__file__).parent.parent.parent @@ -41,17 +41,6 @@ def get_args_from_cli(args=None) -> str: return filepath -def get_experiment_schema_filepath(): - """Obtains and checks the filepath to the JSON schema. - - Returns: - the filepath to the schema in Traversable format. - """ - schemafile = files("autotuning_methodology").joinpath("schema.json") - assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" - return schemafile - - def make_and_check_path(filename: str, parent=None, extension=None) -> Path: filename_path = Path(filename) if filename_path.is_absolute() is False and parent is not None: @@ -88,15 +77,11 @@ def get_experiment(filename: str) -> dict: # path = Path(filename) assert path.exists(), f"Path to experiment file does not exist, attempted path: {path}, CWD: {getcwd()}" - # get the path to the schema - schemafile_path = get_experiment_schema_filepath() - # open the experiment file and validate using the schema file - with path.open("r", encoding="utf-8") as file, schemafile_path.open("r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) + with path.open("r", encoding="utf-8") as file: experiment: dict = json.load(file) try: - validate(instance=experiment, schema=schema) + validate_experimentsfile(experiment) return experiment except ValidationError as e: print(e) @@ -377,6 +362,8 @@ def generate_input_file(group: dict): raise RuntimeError( f"Only JSON output format is supported. Please set General.OutputFormat to JSON in {group['application_input_file']}." ) + if "TimeUnit" not in input_json["General"]: + input_json["General"]["TimeUnit"] = "Milliseconds" if input_json["KernelSpecification"].get("Device") is None: input_json["KernelSpecification"]["Device"] = {} input_json["KernelSpecification"]["Device"]["Name"] = group["gpu"] @@ -433,10 +420,7 @@ def generate_experiment_file( experiment[key].update(value) # validate and write to experiments file - schemafile_path = get_experiment_schema_filepath() - with schemafile_path.open("r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) - validate(experiment, schema) + validate_experimentsfile(experiment) with experiment_file_path.open("w", encoding="utf-8") as fp: json.dump(experiment, fp) @@ -467,14 +451,11 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di objective_time_keys: list[str] = experiment["statistics_settings"]["objective_time_keys"] if "all" in objective_time_keys: objective_time_keys = [] - # get the path to the schema - schemafile = get_experiment_schema_filepath() # open the experiment file and validate using the schema file - with schemafile.open("r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) - objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"][ - "items" - ]["enum"] + schema = validate_experimentsfile(experiment) + objective_time_keys = schema["properties"]["statistics_settings"]["properties"]["objective_time_keys"]["items"][ + "enum" + ] objective_time_keys.remove("all") experiment["statistics_settings"]["objective_time_keys"] = objective_time_keys diff --git a/src/autotuning_methodology/validators.py b/src/autotuning_methodology/validators.py index 1fcab0b..ebd0f74 100644 --- a/src/autotuning_methodology/validators.py +++ b/src/autotuning_methodology/validators.py @@ -1,11 +1,35 @@ """Module containing various checks for validity.""" +from importlib.resources import files +from json import load + import numpy as np +from jsonschema import validate error_types_strings = ["", "InvalidConfig", "CompilationFailedConfig", "RuntimeFailedConfig"] kernel_tuner_error_value = 1e20 +def get_experiment_schema_filepath(): + """Obtains and checks the filepath to the JSON schema. + + Returns: + the filepath to the schema in Traversable format. + """ + schemafile = files("autotuning_methodology").joinpath("schema.json") + assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" + return schemafile + + +def validate_experimentsfile(instance: dict, encoding="utf-8") -> dict: + """Validates the passed instance against the T4 schema. Returns schema or throws ValidationError.""" + schemafile_path = get_experiment_schema_filepath() + with schemafile_path.open("r", encoding=encoding) as fp: + schema = load(fp) + validate(instance=instance, schema=schema) + return schema + + def is_invalid_objective_performance(objective_performance: float) -> bool: """Returns whether an objective value is invalid by checking against NaN and the error value. diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 35b7220..79d3266 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -1,21 +1,15 @@ """Integration test for running and fetching an experiment from cache.""" -import json from importlib.resources import files from pathlib import Path from shutil import copyfile import numpy as np import pytest -from jsonschema import validate from autotuning_methodology.curves import StochasticOptimizationAlgorithm -from autotuning_methodology.experiments import ( - ResultsDescription, - execute_experiment, - get_args_from_cli, - get_experiment_schema_filepath, -) +from autotuning_methodology.experiments import ResultsDescription, execute_experiment, get_args_from_cli +from autotuning_methodology.validators import validate_experimentsfile # get the path to the package package_path = Path(files("autotuning_methodology")).parent.parent @@ -203,10 +197,7 @@ def validate_experiment_results( assert isinstance(results_descriptions, dict) # validate the contents - schemafilepath = get_experiment_schema_filepath() - with open(schemafilepath, "r", encoding="utf-8") as schemafile: - schema = json.load(schemafile) - validate(instance=experiment, schema=schema) + validate_experimentsfile(experiment) kernel_name = experiment["kernels"][0] gpu_name = experiment["GPUs"][0] assert len(strategies) == 1 From 71f0cbbb7b0e284ca30048131093c24e98ebf6bb Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 20:32:19 -0700 Subject: [PATCH 026/234] Implemented T4 schema validator --- src/autotuning_methodology/schemas/T4.json | 87 +++++++++++++++++++ .../{schema.json => schemas/experiments.json} | 0 src/autotuning_methodology/validators.py | 33 +++++-- 3 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 src/autotuning_methodology/schemas/T4.json rename src/autotuning_methodology/{schema.json => schemas/experiments.json} (100%) diff --git a/src/autotuning_methodology/schemas/T4.json b/src/autotuning_methodology/schemas/T4.json new file mode 100644 index 0000000..fb7f620 --- /dev/null +++ b/src/autotuning_methodology/schemas/T4.json @@ -0,0 +1,87 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/odgaard/TuningSchema/blob/T4/results-schema.json", + "description": "Open Autotuning Results Schema", + "type": "object", + "properties": { + "schema_version": { + "description": "The version number of the schema in major.minor.patch format.", + "type": "string", + "pattern": "^[0-9]{1,}.[0-9]{1,}.[0-9]{1,}$", + "example": "1.0.0" + }, + "results": { + "type": "array", + "items": { + "type": "object", + "properties": { + "timestamp": { + "type": "string" + }, + "configuration": { + "type": "object" + }, + "objectives": { + "type": "array" + }, + "times": { + "type": "object", + "properties": { + "compilation_time": { + "type": "number" + }, + "runtimes": { + "type": "array" + }, + "framework": { + "type": "number" + }, + "search_algorithm": { + "type": "number" + }, + "validation": { + "type": "number" + } + } + }, + "invalidity": { + "enum": [ + "timeout", + "compile", + "runtime", + "correctness", + "constraints", + "correct" + ] + }, + "correctness": { + "type": "number" + }, + "measurements": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": "number" + }, + "unit": { + "type": "string" + } + } + } + } + }, + "required": [ + "configuration", + "times", + "invalidity", + "correctness" + ] + } + } + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/schema.json b/src/autotuning_methodology/schemas/experiments.json similarity index 100% rename from src/autotuning_methodology/schema.json rename to src/autotuning_methodology/schemas/experiments.json diff --git a/src/autotuning_methodology/validators.py b/src/autotuning_methodology/validators.py index ebd0f74..d6d4176 100644 --- a/src/autotuning_methodology/validators.py +++ b/src/autotuning_methodology/validators.py @@ -8,6 +8,7 @@ error_types_strings = ["", "InvalidConfig", "CompilationFailedConfig", "RuntimeFailedConfig"] kernel_tuner_error_value = 1e20 +schemas_path = files("autotuning_methodology").joinpath("schemas") def get_experiment_schema_filepath(): @@ -16,20 +17,42 @@ def get_experiment_schema_filepath(): Returns: the filepath to the schema in Traversable format. """ - schemafile = files("autotuning_methodology").joinpath("schema.json") - assert schemafile.is_file(), f"Path to schema.json does not exist, attempted path: {schemafile}" + schemafile = schemas_path.joinpath("experiments.json") + assert schemafile.is_file(), f"Path to experiments.json does not exist, attempted path: {schemafile}" return schemafile -def validate_experimentsfile(instance: dict, encoding="utf-8") -> dict: - """Validates the passed instance against the T4 schema. Returns schema or throws ValidationError.""" - schemafile_path = get_experiment_schema_filepath() +def get_T4_schema_filepath(): + """Obtains and checks the filepath to the JSON schema. + + Returns: + the filepath to the schema in Traversable format. + """ + schemafile = schemas_path.joinpath("T4.json") + assert schemafile.is_file(), f"Path to T4.json does not exist, attempted path: {schemafile}" + return schemafile + + +def validate_with_schema_path(instance: dict, schemafile_path, encoding: str) -> dict: + """Validates the passed instance against the passed schema path. Returns schema or throws ValidationError.""" with schemafile_path.open("r", encoding=encoding) as fp: schema = load(fp) validate(instance=instance, schema=schema) return schema +def validate_experimentsfile(instance: dict, encoding="utf-8") -> dict: + """Validates the passed instance against the experiments file schema. Returns schema or throws ValidationError.""" + schemafile_path = get_experiment_schema_filepath() + return validate_with_schema_path(instance, schemafile_path, encoding) + + +def validate_T4(instance: dict, encoding="utf-8") -> dict: + """Validates the passed instance against the T4 schema. Returns schema or throws ValidationError.""" + schemafile_path = get_T4_schema_filepath() + return validate_with_schema_path(instance, schemafile_path, encoding) + + def is_invalid_objective_performance(objective_performance: float) -> bool: """Returns whether an objective value is invalid by checking against NaN and the error value. From b9259a14fe83bc98960d597772dd9e1c6264699f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 20:33:44 -0700 Subject: [PATCH 027/234] Implemented conversion of time units based on T4 metadata --- .../searchspace_statistics.py | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index e84dc8c..0f4e23d 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import numpy as np -from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time +from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time, validate_T4 def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: @@ -28,6 +28,20 @@ def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: return summed_array +def convert_from_time_unit(value, from_unit: str): + """Convert the value from the specified time unit to seconds.""" + if from_unit is None or from_unit.lower() == "seconds": + return value + elif from_unit.lower() == "miliseconds": + return value / 1000 + elif from_unit.lower() == "microseconds": + return value / 1000000 + elif from_unit.lower() == "nanoseconds": + return value / 1000000000 + else: + raise ValueError(f"Conversion unit {from_unit} is not supported") + + class SearchspaceStatistics: """Object for obtaining information from a full search space file.""" @@ -250,7 +264,7 @@ def _is_not_invalid_value(self, value, performance: bool) -> bool: invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time return not invalid_check_function(value) - def _to_valid_array(self, results: list[dict], key: str, performance: bool) -> np.ndarray: + def _to_valid_array(self, results: list[dict], key: str, performance: bool, from_time_unit: str = None) -> np.ndarray: """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" # make a list of all valid values if performance: @@ -258,14 +272,17 @@ def _to_valid_array(self, results: list[dict], key: str, performance: bool) -> n for r in results: for m in r["measurements"]: if key == m["name"]: - if self._is_not_invalid_value(m["value"], performance): - values.append(m["value"]) + val = m["value"] + if self._is_not_invalid_value(val, performance): + if len(m["unit"]) > 0: + val = convert_from_time_unit(val, m["unit"]) + values.append(val) else: values.append(np.nan) else: values = list( ( - v["times"][key] + convert_from_time_unit(v["times"][key], from_time_unit) if key in v["times"] and self._is_not_invalid_value(v["times"][key], performance) else np.nan ) @@ -288,15 +305,15 @@ def _to_valid_array(self, results: list[dict], key: str, performance: bool) -> n def _load(self) -> bool: """Load the contents of the full search space file.""" - # TODO check if the file is in KernelTuner format # if not, use a script to create a file with values from KTT output and formatting of KernelTuner filepath = self.get_valid_filepath() with open(filepath, "r", encoding="utf-8") as fh: print(f"Loading full search space file {filepath} and initializing the statistics...") + # get the cache from the .json file orig_contents = fh.read() try: - data = json.loads(orig_contents) + data: dict = json.loads(orig_contents) except json.decoder.JSONDecodeError: contents = orig_contents[:-1] + "}\n}" try: @@ -305,6 +322,11 @@ def _load(self) -> bool: contents = orig_contents[:-2] + "}\n}" data = json.loads(contents) + # validate it is in T4 format + validate_T4(data) + + metadata: dict = data.get("metadata", {}) + timeunit = metadata.get("timeunit", "seconds") results: dict = data["results"] self.results = results @@ -312,10 +334,9 @@ def _load(self) -> bool: self.size = len(data["results"]) self.objective_times = dict() for key in self.objective_time_keys: - self.objective_times[key] = self._to_valid_array(results, key, performance=False) - # self.objective_times[key] = ( - # self.objective_times[key] / 1000 - # ) # TODO Kernel Tuner specific miliseconds to seconds conversion + self.objective_times[key] = self._to_valid_array( + results, key, performance=False, from_time_unit=timeunit + ) # in runner.convert_KTT_output_to_standard all times get converted to ms assert ( self.objective_times[key].ndim == 1 From c647b87153b05c86cbf81860cd3380bae6feb93d Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 20:52:28 -0700 Subject: [PATCH 028/234] Time unit conversion supports lists --- src/autotuning_methodology/schemas/T4.json | 5 ++++- src/autotuning_methodology/searchspace_statistics.py | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/schemas/T4.json b/src/autotuning_methodology/schemas/T4.json index fb7f620..f76b102 100644 --- a/src/autotuning_methodology/schemas/T4.json +++ b/src/autotuning_methodology/schemas/T4.json @@ -66,7 +66,10 @@ "type": "string" }, "value": { - "type": "number" + "type": [ + "number", + "string" + ] }, "unit": { "type": "string" diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 0f4e23d..1e77e99 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -29,9 +29,11 @@ def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: def convert_from_time_unit(value, from_unit: str): - """Convert the value from the specified time unit to seconds.""" + """Convert the value or list of values from the specified time unit to seconds.""" if from_unit is None or from_unit.lower() == "seconds": return value + if isinstance(value, list): + return [convert_from_time_unit(v, from_unit) for v in value] elif from_unit.lower() == "miliseconds": return value / 1000 elif from_unit.lower() == "microseconds": @@ -264,7 +266,9 @@ def _is_not_invalid_value(self, value, performance: bool) -> bool: invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time return not invalid_check_function(value) - def _to_valid_array(self, results: list[dict], key: str, performance: bool, from_time_unit: str = None) -> np.ndarray: + def _to_valid_array( + self, results: list[dict], key: str, performance: bool, from_time_unit: str = None + ) -> np.ndarray: """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" # make a list of all valid values if performance: From 69a98af9c03e262dadf719373b1099098eb0f2d0 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 22:40:17 -0700 Subject: [PATCH 029/234] Generalized several useful Searchspace statistics functions --- .../searchspace_statistics.py | 106 +++++++++--------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 1e77e99..292ef6b 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -44,6 +44,56 @@ def convert_from_time_unit(value, from_unit: str): raise ValueError(f"Conversion unit {from_unit} is not supported") +def is_not_invalid_value(value, performance: bool) -> bool: + """Checks if a performance or time value is an array or is not invalid.""" + if isinstance(value, str): + return False + if isinstance(value, (list, tuple, np.ndarray)): + return True + invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time + return not invalid_check_function(value) + + +def to_valid_array(results: list[dict], key: str, performance: bool, from_time_unit: str = None) -> np.ndarray: + """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" + # make a list of all valid values + if performance: + values = list() + for r in results: + for m in r["measurements"]: + if key == m["name"]: + val = m["value"] + if is_not_invalid_value(val, performance): + if len(m["unit"]) > 0: + val = convert_from_time_unit(val, m["unit"]) + values.append(val) + else: + values.append(np.nan) + else: + values = list( + ( + convert_from_time_unit(v["times"][key], from_time_unit) + if key in v["times"] and is_not_invalid_value(v["times"][key], performance) + else np.nan + ) + for v in results + ) + # TODO other that time, performance such as power usage are in results["measurements"]. or not? + # check if there are values that are arrays + for value_index, value in enumerate(values): + if isinstance(value, (list, tuple, np.ndarray)): + # if the value is an array, sum the valid values + array = value + list_to_sum = list(v for v in array if is_not_invalid_value(v, performance)) + values[value_index] = ( + sum(list_to_sum) + if len(list_to_sum) > 0 and is_not_invalid_value(sum(list_to_sum), performance) + else np.nan + ) + assert all(isinstance(v, (int, float)) for v in values) + return np.array(values) + + class SearchspaceStatistics: """Object for obtaining information from a full search space file.""" @@ -257,56 +307,6 @@ def get_valid_filepath(self) -> Path: ) return filepath - def _is_not_invalid_value(self, value, performance: bool) -> bool: - """Checks if a performance or time value is an array or is not invalid.""" - if isinstance(value, str): - return False - if isinstance(value, (list, tuple, np.ndarray)): - return True - invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time - return not invalid_check_function(value) - - def _to_valid_array( - self, results: list[dict], key: str, performance: bool, from_time_unit: str = None - ) -> np.ndarray: - """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" - # make a list of all valid values - if performance: - values = list() - for r in results: - for m in r["measurements"]: - if key == m["name"]: - val = m["value"] - if self._is_not_invalid_value(val, performance): - if len(m["unit"]) > 0: - val = convert_from_time_unit(val, m["unit"]) - values.append(val) - else: - values.append(np.nan) - else: - values = list( - ( - convert_from_time_unit(v["times"][key], from_time_unit) - if key in v["times"] and self._is_not_invalid_value(v["times"][key], performance) - else np.nan - ) - for v in results - ) - # TODO other that time, performance such as power usage are in results["measurements"]. or not? - # check if there are values that are arrays - for value_index, value in enumerate(values): - if isinstance(value, (list, tuple, np.ndarray)): - # if the value is an array, sum the valid values - array = value - list_to_sum = list(v for v in array if self._is_not_invalid_value(v, performance)) - values[value_index] = ( - sum(list_to_sum) - if len(list_to_sum) > 0 and self._is_not_invalid_value(sum(list_to_sum), performance) - else np.nan - ) - assert all(isinstance(v, (int, float)) for v in values) - return np.array(values) - def _load(self) -> bool: """Load the contents of the full search space file.""" # if not, use a script to create a file with values from KTT output and formatting of KernelTuner @@ -338,9 +338,7 @@ def _load(self) -> bool: self.size = len(data["results"]) self.objective_times = dict() for key in self.objective_time_keys: - self.objective_times[key] = self._to_valid_array( - results, key, performance=False, from_time_unit=timeunit - ) + self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) # in runner.convert_KTT_output_to_standard all times get converted to ms assert ( self.objective_times[key].ndim == 1 @@ -356,7 +354,7 @@ def _load(self) -> bool: # get the performance values per configuration self.objective_performances = dict() for key in self.objective_performance_keys: - self.objective_performances[key] = self._to_valid_array(results, key, performance=True) + self.objective_performances[key] = to_valid_array(results, key, performance=True) assert ( self.objective_performances[key].ndim == 1 ), f"Should have one dimension, has {self.objective_performances[key].ndim}" From fd8f319a69f55789e44449d28b02d547592dd8c0 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 23:09:46 -0700 Subject: [PATCH 030/234] Completed conversion of time based on units --- src/autotuning_methodology/runner.py | 14 +++++++++++++- .../searchspace_statistics.py | 17 +++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 47adcdf..32f911d 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -16,11 +16,12 @@ import yappi from autotuning_methodology.caching import ResultsDescription -from autotuning_methodology.searchspace_statistics import SearchspaceStatistics +from autotuning_methodology.searchspace_statistics import SearchspaceStatistics, convert_from_time_unit from autotuning_methodology.validators import ( is_invalid_objective_performance, is_invalid_objective_time, is_valid_config_result, + validate_T4, ) # TODO this does not conform to new intended dicrectory structure @@ -369,7 +370,18 @@ def get_KTT_results_and_metadata(output_filename: str) -> tuple[dict, list, floa total_time_ms = round((total_end_time - total_start_time) * 1000) else: raise ValueError(f"Invalid autotuning framework '{group['autotuner']}'") + + # convert time units + timeunit: str = results.get("metadata", {}).get("timeunit", "seconds") + for result in results["results"]: + for k, v in result["times"].items(): + result["times"][k] = convert_from_time_unit(v, timeunit) + for i, m in enumerate(result["measurements"]): + if "unit" in m and not isinstance(m["value"], str): + result["measurements"][i]["value"] = convert_from_time_unit(m["value"], m["unit"]) + # be careful not to rely on total_time_ms when profiling, because it will include profiling time + validate_T4(results) return metadata, results, total_time_ms diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 292ef6b..40494d1 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -30,15 +30,20 @@ def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: def convert_from_time_unit(value, from_unit: str): """Convert the value or list of values from the specified time unit to seconds.""" - if from_unit is None or from_unit.lower() == "seconds": - return value - if isinstance(value, list): + if from_unit is None: + return None + elif isinstance(value, list): return [convert_from_time_unit(v, from_unit) for v in value] - elif from_unit.lower() == "miliseconds": + elif not isinstance(value, (int, float, complex)): + return value + unit = from_unit.lower() + if unit == "seconds" or unit == "s": + return value + elif unit == "miliseconds" or unit == "ms": return value / 1000 - elif from_unit.lower() == "microseconds": + elif unit == "microseconds": return value / 1000000 - elif from_unit.lower() == "nanoseconds": + elif unit == "nanoseconds" or unit == "ns": return value / 1000000000 else: raise ValueError(f"Conversion unit {from_unit} is not supported") From 1b0ca47ebc9bf1ed7e06efe0d8942ac94801d0e2 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 23 Oct 2024 00:53:58 -0700 Subject: [PATCH 031/234] Baseline visualization in absolute plot, disabled automatic conversion of objective performance --- src/autotuning_methodology/runner.py | 8 ++++---- src/autotuning_methodology/searchspace_statistics.py | 5 +++-- src/autotuning_methodology/visualize_experiments.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 32f911d..d8bec01 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -305,7 +305,6 @@ def tune_with_KTT(): except subprocess.CalledProcessError as er: print(er.stdout) print(er.stderr) - pass # remove the modified input file, output file was written in experiment_parent_folder/run/group_name/ subprocess.run(["rm", group["input_file"].name], check=False) if profiling: @@ -376,9 +375,10 @@ def get_KTT_results_and_metadata(output_filename: str) -> tuple[dict, list, floa for result in results["results"]: for k, v in result["times"].items(): result["times"][k] = convert_from_time_unit(v, timeunit) - for i, m in enumerate(result["measurements"]): - if "unit" in m and not isinstance(m["value"], str): - result["measurements"][i]["value"] = convert_from_time_unit(m["value"], m["unit"]) + # performance should not be auto-converted + # for i, m in enumerate(result["measurements"]): + # if "unit" in m and not isinstance(m["value"], str): + # result["measurements"][i]["value"] = convert_from_time_unit(m["value"], m["unit"]) # be careful not to rely on total_time_ms when profiling, because it will include profiling time validate_T4(results) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 40494d1..9770b38 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -69,8 +69,9 @@ def to_valid_array(results: list[dict], key: str, performance: bool, from_time_u if key == m["name"]: val = m["value"] if is_not_invalid_value(val, performance): - if len(m["unit"]) > 0: - val = convert_from_time_unit(val, m["unit"]) + # performance should not be auto-converted + # if len(m["unit"]) > 0: + # val = convert_from_time_unit(val, m["unit"]) values.append(val) else: values.append(np.nan) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 9de9b93..726a3e9 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -700,7 +700,7 @@ def normalize_multiple(curves: list) -> tuple: if baseline_curve is not None: if y_type == "baseline": ax.axhline(0, label="baseline trajectory", color="black", ls="--") - elif y_type == "normalized" or y_type == "baseline": + elif y_type == "normalized" or y_type == "baseline" or y_type == "absolute": baseline = baseline_curve.get_curve(x_axis_range, x_type) if y_type == "normalized": baseline = normalize(baseline) From d9bf8bccb161e7f021bd14a156e22c64682d1ddc Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 23 Oct 2024 02:26:15 -0700 Subject: [PATCH 032/234] Improvement to reporting non-overlapping time range --- src/autotuning_methodology/curves.py | 8 ++++---- src/autotuning_methodology/experiments_defaults.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index bd7ac5c..df5a711 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -715,12 +715,12 @@ def _get_curve_over_time_values_in_range( # filter to get the time range with a margin on both ends for the isotonic regression time_range_margin = 0.1 - range_mask_margin = (time_range[0] * (1 - time_range_margin) <= times) & ( - times <= time_range[-1] * (1 + time_range_margin) - ) + time_range_start = time_range[0] * (1 - time_range_margin) + time_range_end = time_range[-1] * (1 + time_range_margin) + range_mask_margin = (time_range_start <= times) & (times <= time_range_end) assert np.all( np.count_nonzero(range_mask_margin, axis=0) > 1 - ), "Not enough overlap in time range and time values" + ), f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}" times = np.where(range_mask_margin, times, np.nan) values = np.where(range_mask_margin, values, np.nan) num_repeats = values.shape[1] diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index f4decc5..7020b8f 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -23,7 +23,7 @@ "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 100, + "repeats": 10, "samples": 32, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false From 31bd3bb0b2bfbce3f45623079268aaabd3855316 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 15:14:15 -0700 Subject: [PATCH 033/234] Revamped plots specification in experiments files --- .gitignore | 1 + .../methodology_paper_evaluation_new.json | 37 +++- .../experiments_defaults.json | 39 +++- .../schemas/experiments.json | 99 +++++++--- .../visualize_experiments.py | 183 ++++++++++-------- 5 files changed, 226 insertions(+), 133 deletions(-) diff --git a/.gitignore b/.gitignore index b6f1705..2b9c59e 100755 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ cached_data_used/visualizations/* cached_data_used/last_run/* cached_data_used/import_runs/* methodology_paper_evaluation/run/* +hyperparametertuning/* # ignore setup files */setup/*_input.json diff --git a/experiment_files/methodology_paper_evaluation_new.json b/experiment_files/methodology_paper_evaluation_new.json index 6e3816b..462431e 100644 --- a/experiment_files/methodology_paper_evaluation_new.json +++ b/experiment_files/methodology_paper_evaluation_new.json @@ -1,5 +1,5 @@ { - "version": "1.0.0", + "version": "1.1.0", "name": "Methodology paper evaluation", "parent_folder": "./methodology_paper_evaluation", "experimental_groups_defaults": { @@ -55,14 +55,33 @@ ] }, "visualization_settings": { - "x_axis_value_types": [ - "fevals", - "time", - "aggregated" - ], - "y_axis_value_types": [ - "normalized", - "baseline" + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line" + } ], "resolution": 1000.0, "confidence_level": 0.95, diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 7020b8f..c0c1b41 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -1,5 +1,5 @@ { - "version": "1.0.0", + "version": "1.1.0", "name": "", "parent_folder": ".", "experimental_groups_defaults": { @@ -23,7 +23,7 @@ "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 10, + "repeats": 25, "samples": 32, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false @@ -49,14 +49,33 @@ ] }, "visualization_settings": { - "x_axis_value_types": [ - "fevals", - "time", - "aggregated" - ], - "y_axis_value_types": [ - "normalized", - "baseline" + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line" + } ], "resolution": 1000.0, "confidence_level": 0.95, diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 8eab3a6..2328be7 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", - "version": "1.0.0", + "version": "1.1.0", "title": "Experiment", "description": "An experiment setup configuration file", "type": "object", @@ -285,46 +285,81 @@ "description": "Settings for the visualizations", "type": "object", "required": [ + "plots", "resolution", - "x_axis_value_types", - "y_axis_value_types", "confidence_level" ], "properties": { + "plots": { + "description": "Specification of the plots to produce.", + "type": "array", + "items": { + "type": "object", + "required": [ + "scope", + "style" + ], + "properties": { + "scope": { + "description": "The scope of the plot: whether it's a plot per searchspace, per search strategy, or the aggregate.", + "type": "string", + "enum": [ + "searchspace", + "search_strategy", + "aggregate" + ] + }, + "style": { + "description": "The type of plot.", + "type": "string", + "enum": [ + "line", + "scatter", + "heatmap" + ] + }, + "x_axis_value_types": { + "description": "Types of value on the x-axis. Multiple values may produce multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "fevals", + "time", + "applications", + "gpus", + "searchspaces" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "y_axis_value_types": { + "description": "Types of value on the y-axis. Multiple values may produce multiple (sub) plots.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "absolute", + "normalized", + "baseline", + "applications", + "gpus", + "searchspaces" + ] + }, + "minItems": 1, + "uniqueItems": true + } + } + } + }, "resolution": { "description": "The resolution of the time range", "type": "integer", + "default": 1000, "minimum": 2 }, - "x_axis_value_types": { - "description": "Types of value on the x-axis. Multiple values produces multiple (sub) plots.", - "type": "array", - "items": { - "type": "string", - "enum": [ - "fevals", - "time", - "aggregated" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "y_axis_value_types": { - "description": "Types of value on the y-axis. Multiple values produces multiple (sub) plots.", - "type": "array", - "items": { - "type": "string", - "enum": [ - "absolute", - "scatter", - "normalized", - "baseline" - ] - }, - "minItems": 1, - "uniqueItems": true - }, "confidence_level": { "description": "The confidence level used for the confidence / prediction interval, visualized as an error shade", "type": "number", diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 726a3e9..1056a84 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -181,8 +181,7 @@ def __init__( objective_time_keys: list[str] = self.experiment["statistics_settings"]["objective_time_keys"] # plot settings - plot_x_value_types: list[str] = self.experiment["visualization_settings"]["x_axis_value_types"] - plot_y_value_types: list[str] = self.experiment["visualization_settings"]["y_axis_value_types"] + plots: list[dict] = self.experiment["visualization_settings"]["plots"] compare_baselines: bool = self.experiment["visualization_settings"]["compare_baselines"] compare_split_times: bool = self.experiment["visualization_settings"]["compare_split_times"] confidence_level: float = self.experiment["visualization_settings"]["confidence_level"] @@ -209,6 +208,8 @@ def __init__( time_resolution, use_strategy_as_baseline, ) + + # plot per searchspace for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: print(f" | visualizing optimization of {application_name} for {gpu_name}") @@ -282,88 +283,104 @@ def __init__( # ) # ) - # visualize the results - for x_type in plot_x_value_types: - if x_type == "aggregated": + for plot in plots: + # get settings + scope: str = plot["scope"] + if scope != "searchspace": continue - elif x_type == "fevals": - x_axis_range = fevals_range - elif x_type == "time": - x_axis_range = time_range - else: - raise ValueError(f"Invalid {x_type=}") - - # create the figure and plots - fig, axs = plt.subplots( - nrows=len(plot_y_value_types), - ncols=1, - figsize=(8, 3.4 * len(plot_y_value_types)), - sharex=True, - dpi=300, - ) - if not hasattr( - axs, "__len__" - ): # if there is just one subplot, wrap it in a list so it can be passed to the plot functions - axs = [axs] - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # plot the subplots of individual searchspaces - for index, y_type in enumerate(plot_y_value_types): - self.plot_strategies( - x_type, - y_type, - axs[index], - searchspace_stats, - strategies_curves, - x_axis_range, - self.experiment["visualization_settings"], - random_baseline, - baselines_extra=baselines_extra, + style: str = plot["style"] + plot_x_value_types: list[str] = plot["x_axis_value_types"] + plot_y_value_types: list[str] = plot["y_axis_value_types"] + + # visualize the results + for x_type in plot_x_value_types: + if x_type == "fevals": + x_axis_range = fevals_range + elif x_type == "time": + x_axis_range = time_range + else: + raise ValueError(f"X-axis type '{x_type}' not supported for scope '{plot}'") + + # create the figure and plots + fig, axs = plt.subplots( + nrows=len(plot_y_value_types), + ncols=1, + figsize=(8, 3.4 * len(plot_y_value_types)), + sharex=True, + dpi=300, ) - if index == 0: - loc = "lower right" if y_type == "normalized" else "best" - axs[index].legend(loc=loc) - - # finalize the figure and save or display it - fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) - fig.tight_layout() - if save_figs: - filename_path = Path(self.plot_filename_prefix) / f"{title}_{x_type}".replace(" ", "_") - fig.savefig(filename_path, dpi=300) - print(f"Figure saved to {filename_path}") - else: - plt.show() + if not hasattr( + axs, "__len__" + ): # if there is just one subplot, wrap it in a list so it can be passed to the plot functions + axs = [axs] + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # plot the subplots of individual searchspaces + for index, y_type in enumerate(plot_y_value_types): + self.plot_strategies( + style, + x_type, + y_type, + axs[index], + searchspace_stats, + strategies_curves, + x_axis_range, + self.experiment["visualization_settings"], + random_baseline, + baselines_extra=baselines_extra, + ) + if index == 0: + loc = "lower right" if y_type == "normalized" else "best" + axs[index].legend(loc=loc) + + # finalize the figure and save or display it + fig.supxlabel(self.get_x_axis_label(x_type, objective_time_keys)) + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / f"{title}_{x_type}".replace(" ", "_") + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() + + # plot per searchstrategy + # TODO # plot the aggregated searchspaces - if ( - "aggregated" in plot_x_value_types - and continue_after_comparison - or not (compare_baselines or compare_split_times) - ): - fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 - ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. - if not hasattr(axs, "__len__"): - axs = [axs] - title = f"""Aggregated Data\napplications: - {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # finalize the figure and save or display it - self.plot_strategies_aggregated( - axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] - ) - fig.tight_layout() - if save_figs: - filename_path = Path(self.plot_filename_prefix) / "aggregated" - fig.savefig(filename_path, dpi=300) - print(f"Figure saved to {filename_path}") - else: - plt.show() + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + if scope != "aggregate": + continue + if style != "line": + raise ValueError(f"Aggregated only supports 'line' as a style, not {style}") + # plot the aggregation + if continue_after_comparison or not (compare_baselines or compare_split_times): + fig, axs = plt.subplots( + ncols=1, figsize=(9, 6), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"""Aggregated Data\napplications: + {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # finalize the figure and save or display it + self.plot_strategies_aggregated( + axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] + ) + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / "aggregated" + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() def plot_baselines_comparison( self, @@ -646,6 +663,7 @@ def plot_split_times_bar_comparison( def plot_strategies( self, + style: str, x_type: str, y_type: str, ax: plt.Axes, @@ -661,6 +679,7 @@ def plot_strategies( """Plots all optimization strategies for individual search spaces. Args: + style: the style of plot, either 'line' or 'scatter'. x_type: the type of ``x_axis_range``. y_type: the type of plot on the y-axis. ax: the axis to plot on. @@ -690,7 +709,7 @@ def normalize_multiple(curves: list) -> tuple: return tuple(normalize(curve) for curve in curves) # plot the absolute optimum - absolute_optimum_y_value = absolute_optimum if y_type == "absolute" or y_type == "scatter" else 1 + absolute_optimum_y_value = absolute_optimum if y_type == "absolute" or style == "scatter" else 1 absolute_optimum_label = ( "Absolute optimum ({})".format(round(absolute_optimum, 3)) if y_type == "absolute" else "Absolute optimum" ) @@ -736,7 +755,7 @@ def normalize_multiple(curves: list) -> tuple: continue # get the plot data - if y_type == "scatter": + if style == "scatter": x_axis, y_axis = strategy_curve.get_scatter_data(x_type) ax.scatter(x_axis, y_axis, label=label, color=color) continue From ec8cfe7a981c476ffd24ee41267d44c925f7db3c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 18:08:59 -0700 Subject: [PATCH 034/234] Fixed milliseconds spelling error --- src/autotuning_methodology/experiments.py | 4 ++-- src/autotuning_methodology/runner.py | 4 ++-- src/autotuning_methodology/searchspace_statistics.py | 4 ++-- src/autotuning_methodology/visualize_experiments.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 84fda40..86e73f7 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -49,11 +49,11 @@ def make_and_check_path(filename: str, parent=None, extension=None) -> Path: return filename_path # try and add extension if extension is None: - raise FileNotFoundError(f"{filename_path} does not exist.") + raise FileNotFoundError(f"{filename_path.resolve()} does not exist.") filename_path = Path(str(filename_path) + extension) if filename_path.exists(): return filename_path - raise FileNotFoundError(f"{filename_path} does not exist.") + raise FileNotFoundError(f"{filename_path.resolve()} does not exist.") def get_experiment(filename: str) -> dict: diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index d8bec01..d2790f9 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -197,7 +197,7 @@ def tune( ValueError: if tuning fails multiple times in a row. Returns: - A tuple of the metadata, the results, and the total runtime in miliseconds. + A tuple of the metadata, the results, and the total runtime in milliseconds. """ def tune_with_kerneltuner_old(): @@ -537,7 +537,7 @@ def get_nan_array() -> np.ndarray: else: value = evaluation_times[key] if value is not None and not is_invalid_objective_time(value): - # value = value / 1000 # TODO this miliseconds to seconds conversion is specific to Kernel Tuner + # value = value / 1000 # TODO this milliseconds to seconds conversion is specific to Kernel Tuner objective_time_results_per_key[key_index, evaluation_index, repeat_index] = value objective_times_list.append(value) # sum the objective times of the keys diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 9770b38..3685600 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -39,7 +39,7 @@ def convert_from_time_unit(value, from_unit: str): unit = from_unit.lower() if unit == "seconds" or unit == "s": return value - elif unit == "miliseconds" or unit == "ms": + elif unit == "milliseconds" or unit == "ms": return value / 1000 elif unit == "microseconds": return value / 1000000 @@ -219,7 +219,7 @@ def plot_histogram(self, cutoff_percentile: float): n_bins = 200 axs[0].hist(performances, bins=n_bins) axs[0].set_ylabel("Number of configurations in bin") - axs[0].set_xlabel("Performance in miliseconds") + axs[0].set_xlabel("Performance in milliseconds") axs[0].axvline(x=[mean], label="Mean", c="red") axs[0].axvline(x=[median], label="Median", c="orange") axs[0].axvline(x=[cutoff_performance], label="Cutoff point", c="green") diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 1056a84..bbc2f6c 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -114,7 +114,7 @@ class Visualize: "objective_baseline_max": "Improvement over random sampling", "aggregate_objective": "Aggregate best-found objective value relative to baseline", "aggregate_objective_max": "Aggregate improvement over random sampling", - "time": "Best-found kernel time in miliseconds", + "time": "Best-found kernel time in milliseconds", "GFLOP/s": "GFLOP/s", } ) From 5df77f3b7e44a14e1af3172f8863284fb3fb78f5 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 18:09:26 -0700 Subject: [PATCH 035/234] Fixed milliseconds spelling error --- .gitignore | 1 + cached_data_used/cachefiles/ktt_values_to_kerneltuner.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2b9c59e..b55dd8a 100755 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ cached_data_used/last_run/* cached_data_used/import_runs/* methodology_paper_evaluation/run/* hyperparametertuning/* +test_run_experiment/* # ignore setup files */setup/*_input.json diff --git a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py b/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py index 5b3d27f..0a167db 100644 --- a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py +++ b/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py @@ -1,7 +1,7 @@ """Script to overwrite Kernel Tuner brute forced cache files with the objective values of a KTT brute force search. Notes: this requires a fully bruteforced KTT and fully bruteforced KernelTuner (KT) cachefile on the same search space. -Objective value is assumed to be time by default. Time is assumed to be in microseconds for KTT and miliseconds for KT. +Objective value is assumed to be time by default. Time is assumed to be in microseconds for KTT and milliseconds for KT. """ import json From 76cf59bc1191dac38168ae3c6e4fb665458f2347 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 18:09:58 -0700 Subject: [PATCH 036/234] Updated experiments file conversion to new experiments file format --- experiment_files/compare_hypertuners.json | 155 ++++++++++++++++++ experiment_files/convert_old_to_new_format.py | 17 +- 2 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 experiment_files/compare_hypertuners.json diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json new file mode 100644 index 0000000..2f5726e --- /dev/null +++ b/experiment_files/compare_hypertuners.json @@ -0,0 +1,155 @@ +{ + "version": "1.0.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "folder": "./cached_data_used/kernels", + "input_file": "convolution.json" + }, + { + "name": "pnpoly", + "folder": "./cached_data_used/kernels", + "input_file": "pnpoly.json" + } + ], + "gpus": [ + "RTX_3090", + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "pso_default", + "search_method": "pso", + "display_name": "PSO default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.5 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.0 + } + ] + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "w", + "value": 0.25 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ] + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py index eb2654b..9877ecb 100644 --- a/experiment_files/convert_old_to_new_format.py +++ b/experiment_files/convert_old_to_new_format.py @@ -6,8 +6,8 @@ # set input and output files folderpath = Path(__file__).parent -old_file_path = folderpath / Path("methodology_paper_evaluation.json") -new_file_path = folderpath / Path("methodology_paper_evaluation_new.json") +old_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test.json") +new_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_new.json") encoding = "utf-8" assert old_file_path.exists(), f"Old file does not exist at {old_file_path}" assert not new_file_path.exists(), f"New file does already exists at {new_file_path}" @@ -18,7 +18,7 @@ # convert the dictionary to the new format new_experiment = { - "version": "1.0.0", + "version": "1.1.0", "name": old_experiment["name"], "parent_folder": f"./{old_experiment['folder_id']}", "experimental_groups_defaults": { @@ -61,8 +61,15 @@ "objective_performance_keys": old_experiment["objective_performance_keys"], }, "visualization_settings": { - "x_axis_value_types": old_experiment["plot"]["plot_x_value_types"], - "y_axis_value_types": old_experiment["plot"]["plot_y_value_types"], + "plots": [ + { + "scope": "aggregate" if "aggregated" in plottype else "searchspace", + "style": "scatter" if "scatter" in plottype else "line", + "x_axis_value_types": [plottype if plottype != "aggregated" else "time"], + "y_axis_value_types": old_experiment["plot"]["plot_y_value_types"], + } + for plottype in old_experiment["plot"]["plot_x_value_types"] + ], "resolution": old_experiment["resolution"], "confidence_level": old_experiment["plot"]["confidence_level"], "compare_baselines": old_experiment["plot"]["compare_baselines"], From 9053a0e76607a714b236ee6e3bec1a817205e030 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 18:13:58 -0700 Subject: [PATCH 037/234] Setup mock test in new experiments and T4 format --- .../integration/mockfiles/convolution.cu | 168 + .../integration/mockfiles/mock_gpu.json | 1 + .../integration/mockfiles/mock_gpu_T4.json | 272888 +++++++++++++++ .../mocktest_kernel_convolution.json | 146 + .../mockfiles/mocktest_kernel_convolution.py | 88 - .../integration/mockfiles/test.json | 124 +- 6 files changed, 273278 insertions(+), 137 deletions(-) create mode 100644 tests/autotuning_methodology/integration/mockfiles/convolution.cu create mode 100644 tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json create mode 100644 tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.json delete mode 100644 tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py diff --git a/tests/autotuning_methodology/integration/mockfiles/convolution.cu b/tests/autotuning_methodology/integration/mockfiles/convolution.cu new file mode 100644 index 0000000..da84830 --- /dev/null +++ b/tests/autotuning_methodology/integration/mockfiles/convolution.cu @@ -0,0 +1,168 @@ +extern "C" { +#define image_height 4096 +#define image_width 4096 + +#ifndef filter_height + #define filter_height 17 +#endif +#ifndef filter_width + #define filter_width 17 +#endif + +#define border_height ((filter_height/2)*2) +#define border_width ((filter_width/2)*2) +#define input_height (image_height + border_height) +#define input_width (image_width + border_width) + +#ifndef block_size_x + #define block_size_x 16 +#endif +#ifndef block_size_y + #define block_size_y 16 +#endif +#ifndef block_size_z + #define block_size_z 1 +#endif +#ifndef tile_size_x + #define tile_size_x 1 +#endif +#ifndef tile_size_y + #define tile_size_y 1 +#endif + +#define i_end min(block_size_y*tile_size_y+border_height, input_height) +#define j_end min(block_size_x*tile_size_x+border_width, input_width) + +/* + * If requested, we can use the __ldg directive to load data through the + * read-only cache. + */ +#define USE_READ_ONLY_CACHE read_only +#if USE_READ_ONLY_CACHE == 1 +#define LDG(x, y) __ldg(x+y) +#elif USE_READ_ONLY_CACHE == 0 +#define LDG(x, y) x[y] +#endif + +__constant__ float d_filter[33*33]; //large enough for the largest filter + +/* + * If use_padding == 1, we introduce (only when necessary) a number of padding + * columns in shared memory to avoid shared memory bank conflicts + * + * padding columns are only inserted when block_size_x is not a multiple of 32 (the assumed number of memory banks) + * and when the width of the data needed is not a multiple of 32. The latter is because some filter_widths never + * cause bank conflicts. + * + * If not passed as a tunable parameter, padding is on by default + */ +#define shared_mem_width (block_size_x*tile_size_x+border_width) +#ifndef use_padding + #define use_padding 1 +#endif +#if use_padding == 1 + #if (((block_size_x % 32)!=0) && (((shared_mem_width-block_size_x)%32) != 0)) + // next line uses &31 instead of %32, because % in C is remainder not modulo + #define padding_columns ((32 - (border_width + block_size_x*tile_size_x - block_size_x)) & 31) + #undef shared_mem_width + #define shared_mem_width (block_size_x*tile_size_x+border_width+padding_columns) + #endif +#endif + + +__global__ void convolution_kernel(float *output, float *input, float *filter) { + int ty = threadIdx.y; + int tx = threadIdx.x; + int by = blockIdx.y * block_size_y * tile_size_y; + int bx = blockIdx.x * block_size_x * tile_size_x; + + //shared memory to hold all input data need by this thread block + __shared__ float sh_input[block_size_y*tile_size_y+border_height][shared_mem_width]; + + //load all input data needed by this thread block into shared memory + #pragma unroll + for (int i=ty; i=64", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "block_size_x*block_size_y<=1024", + "Parameters": [ + "block_size_x", + "block_size_y" + ] + }, + { + "Expression": "tile_size_x*tile_size_y<30", + "Parameters": [ + "tile_size_x", + "tile_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": "ProblemSize[0]*ProblemSize[1]", + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ] + } +} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py b/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py deleted file mode 100644 index f260be0..0000000 --- a/tests/autotuning_methodology/integration/mockfiles/mocktest_kernel_convolution.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Test kernel to run integration tests.""" - -#!/usr/bin/env python -import sys - -import numpy - -import kernel_tuner -from kernel_tuner.file_utils import store_metadata_file, store_output_file - -file_path_prefix = "../../../../cached_data_used" -file_path_results = file_path_prefix + "/last_run/_tune_configuration-results.json" -file_path_metadata = file_path_prefix + "/last_run/_tune_configuration-metadata.json" - - -def tune(device_name: str, strategy="mls", strategy_options=None, verbose=True, quiet=False, simulation_mode=True): - # input dimensions and data - image_width = 4096 - image_height = 4096 - filter_width = 15 - filter_height = 15 - problem_size = (image_width, image_height) - size = numpy.prod(problem_size) - - input_size = (problem_size[0] + filter_width - 1) * (problem_size[1] + filter_height - 1) - output_image = numpy.zeros(size).astype(numpy.float32) - input_image = numpy.random.randn(input_size).astype(numpy.float32) - filter_weights = numpy.random.randn(filter_width * filter_height).astype(numpy.float32) - - cmem_args = {"d_filter": filter_weights} - args = [output_image, input_image, filter_weights] - - metrics = dict() - metrics["GFLOP/s"] = lambda p: (image_width * image_height * filter_width * filter_height * 2 / 1e9) / ( - p["time"] / 1e3 - ) - - # setup tunable parameters - tune_params = dict() - tune_params["block_size_x"] = [1, 2, 4, 8, 16, 32, 48, 64, 96, 112, 128] - tune_params["block_size_y"] = [1, 2, 4, 8, 16, 32] - tune_params["filter_height"] = [filter_height] - tune_params["filter_width"] = [filter_width] - tune_params["read_only"] = [0, 1] - tune_params["tile_size_x"] = [1, 2, 3, 4, 5, 6, 7, 8] - tune_params["tile_size_y"] = [1, 2, 3, 4, 5, 6, 7, 8] - tune_params["use_padding"] = [0, 1] - - restrict = ["block_size_x*block_size_y>=64", "block_size_x*block_size_y<=1024", "tile_size_x*tile_size_y<30"] - - grid_div_x = ["block_size_x", "tile_size_x"] - grid_div_y = ["block_size_y", "tile_size_y"] - - # start tuning - results, env = kernel_tuner.tune_kernel( - "convolution_kernel", - "convolution.cu", - problem_size, - args, - tune_params, - grid_div_y=grid_div_y, - grid_div_x=grid_div_x, - cmem_args=cmem_args, - restrictions=restrict, - cache=file_path_prefix + "/cachefiles/mocktest_kernel_convolution/" + device_name.lower(), - metrics=metrics, - iterations=32, - device=0, - verbose=verbose, - quiet=quiet, - strategy=strategy, - strategy_options=strategy_options, - simulation_mode=simulation_mode, - ) - - store_output_file(file_path_results, results, tune_params) - store_metadata_file(file_path_metadata) - return results, env - - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: ./mocktest_kernel_convolution.py [device name]") - exit(1) - - device_name = sys.argv[1] - - tune(device_name) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 9b11d8b..3fcac7d 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -1,60 +1,86 @@ { - "version": "0.1.2", + "version": "1.1.0", "name": "Mock run test", - "folder_id": "test_run_experiment", - "kernels_path": "../mockfiles", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "mocktest_kernel_convolution" - ], - "GPUs": [ - "mock_GPU" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "fevals", - "time", - "aggregated" + "parent_folder": "./test_run_experiment", + "experimental_groups_defaults": { + "applications": [ + { + "name": "mocktest_kernel_convolution", + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" + } ], - "plot_y_value_types": [ - "normalized", - "baseline" + "gpus": [ + "mock_GPU" ], - "confidence_level": 0.95, - "compare_baselines": true, - "compare_split_times": true - }, - "strategy_defaults": { - "repeats": 10, - "minimum_number_of_evaluations": 20, + "pattern_for_full_search_space_filenames": { + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" + }, "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] + "repeats": 10, + "samples": 3, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false }, - "strategies": [ + "search_strategies": [ { "name": "random_sample_10_iter", - "strategy": "random_sample", + "search_method": "random_sample", "display_name": "Random sampling 10 iters", - "repeats": 3 + "autotuner": "KernelTuner" } - ] + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": true, + "compare_split_times": true + } } \ No newline at end of file From 3d0fc664b334a021dffd0525efdeaa4f1932eaa3 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 25 Oct 2024 18:52:46 -0700 Subject: [PATCH 038/234] Converted integration test files to new experiments file format --- experiment_files/convert_old_to_new_format.py | 9 +-- .../mockfiles/test_bad_kernel_path.json | 59 +------------------ .../integration/mockfiles/test_cached.json | 59 +------------------ .../mockfiles/test_import_runs.json | 59 +------------------ 4 files changed, 8 insertions(+), 178 deletions(-) diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py index 9877ecb..c054dac 100644 --- a/experiment_files/convert_old_to_new_format.py +++ b/experiment_files/convert_old_to_new_format.py @@ -6,8 +6,8 @@ # set input and output files folderpath = Path(__file__).parent -old_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test.json") -new_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_new.json") +old_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_import_runs.json") +new_file_path = folderpath / Path("../tests/autotuning_methodology/integration/mockfiles/test_import_runs_new.json") encoding = "utf-8" assert old_file_path.exists(), f"Old file does not exist at {old_file_path}" assert not new_file_path.exists(), f"New file does already exists at {new_file_path}" @@ -47,10 +47,11 @@ "name": strategy["name"], "search_method": strategy["strategy"], "display_name": strategy["display_name"], - "autotuner": "KernelTuner", # Assuming autotuner is KernelTuner for all strategies + "autotuner": ( + "KernelTuner" if strategy["name"] != "ktt_profile_searcher" else "KTT" + ), # Assuming autotuner is KernelTuner for all strategies } for strategy in old_experiment["strategies"] - if strategy["name"] != "ktt_profile_searcher" ], "statistics_settings": { "minimization": old_experiment["minimization"], diff --git a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json index dcf3554..bb0bbd7 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json @@ -1,58 +1 @@ -{ - "version": "0.1.2", - "name": "Test output file writer", - "folder_id": "test_output_file_writer", - "kernels_path": "cached_data_used/bogus_kernels_path", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "convolution" - ], - "GPUs": [ - "RTX_2080_Ti" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" - ], - "plot_y_value_types": [ - "normalized", - "baseline" - ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "repeats": 100, - "minimum_number_of_evaluations": 20, - "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] - }, - "strategies": [ - { - "name": "random_sample_100_iter", - "strategy": "random_sample", - "display_name": "Random sampling 100 iters", - "repeats": 5 - } - ] -} \ No newline at end of file +{"version": "1.1.0", "name": "Test output file writer", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "convolution", "input_file": "cached_data_used/bogus_kernels_path/convolution", "folder": "../../../../cached_data_used/visualizations/convolution"}], "gpus": ["RTX_2080_Ti"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 100, "samples": 5, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "random_sample_100_iter", "search_method": "random_sample", "display_name": "Random sampling 100 iters", "autotuner": "KernelTuner"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_cached.json b/tests/autotuning_methodology/integration/mockfiles/test_cached.json index 1821090..36c6250 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_cached.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_cached.json @@ -1,58 +1 @@ -{ - "version": "0.1.2", - "name": "Test output file writer", - "folder_id": "test_output_file_writer", - "kernels_path": "cached_data_used/kernels", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "convolution" - ], - "GPUs": [ - "RTX_2080_Ti" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" - ], - "plot_y_value_types": [ - "normalized", - "baseline" - ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "repeats": 100, - "minimum_number_of_evaluations": 20, - "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] - }, - "strategies": [ - { - "name": "random_sample_100_iter", - "strategy": "random_sample", - "display_name": "Random sampling 100 iters", - "repeats": 5 - } - ] -} \ No newline at end of file +{"version": "1.1.0", "name": "Test output file writer", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "convolution", "input_file": "cached_data_used/kernels/convolution", "folder": "../../../../cached_data_used/visualizations/convolution"}], "gpus": ["RTX_2080_Ti"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 100, "samples": 5, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "random_sample_100_iter", "search_method": "random_sample", "display_name": "Random sampling 100 iters", "autotuner": "KernelTuner"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json index 02fcc88..cf695f7 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json @@ -1,58 +1 @@ -{ - "version": "0.1.2", - "name": "Test import runs", - "folder_id": "test_output_file_writer", - "kernels_path": "../mockfiles", - "bruteforced_caches_path": "../../../../cached_data_used/cachefiles", - "visualization_caches_path": "../../../../cached_data_used/visualizations", - "kernels": [ - "mocktest_kernel_convolution" - ], - "GPUs": [ - "mock_GPU" - ], - "minimization": true, - "resolution": 1e3, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.7, - "objective_time_keys": [ - "compilation", - "benchmark", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "time" - ], - "cutoff_type": "fevals", - "plot": { - "plot_x_value_types": [ - "aggregated" - ], - "plot_y_value_types": [ - "normalized", - "baseline" - ], - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - }, - "strategy_defaults": { - "iterations": 32, - "repeats": 2, - "minimum_number_of_evaluations": 20, - "stochastic": true, - "record_data": [ - "time", - "GFLOP/s" - ] - }, - "strategies": [ - { - "name": "ktt_profile_searcher", - "strategy": "profile_searcher", - "display_name": "KTT Profile Searcher" - } - ] -} \ No newline at end of file +{"version": "1.1.0", "name": "Test import runs", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "mocktest_kernel_convolution", "input_file": "../mockfiles/mocktest_kernel_convolution", "folder": "../../../../cached_data_used/visualizations/mocktest_kernel_convolution"}], "gpus": ["mock_GPU"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 2, "samples": 32, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "ktt_profile_searcher", "search_method": "profile_searcher", "display_name": "KTT Profile Searcher", "autotuner": "KTT"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file From 2de6060602e7c469de280b2398a55624e059d294 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 15:39:16 -0700 Subject: [PATCH 039/234] Adjusted the tests for execute_experiments in accordance with new interface --- src/autotuning_methodology/experiments.py | 4 +- .../integration/test_run_experiment.py | 53 +++++++++++++------ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 86e73f7..193cae8 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -428,7 +428,7 @@ def generate_experiment_file( return experiment_file_path.resolve() -def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, dict, dict]: +def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, list, dict, dict]: """Executes the experiment by retrieving it from the cache or running it. Args: @@ -439,7 +439,7 @@ def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, di FileNotFoundError: if the path to the kernel specified in the experiments file is not found. Returns: - A tuple of the experiment dictionary, the experimental groups executed, and the resulting list of ``ResultsDescription``. + A tuple of the experiment dictionary, the experimental groups executed, the dictionary of ``Searchspace statistics`` and the resulting list of ``ResultsDescription``. """ experiment = get_experiment(filepath) experiment_folderpath = Path(experiment["parent_folder"]) diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 79d3266..c6db8ce 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -39,7 +39,7 @@ def _remove_dir(path: Path): """Utility function for removing a directory and the contained files.""" - assert path.exists() + assert path.exists(), f"Path to directory does not exist: {path.resolve()}" for sub in path.iterdir(): sub.unlink() path.rmdir() @@ -116,7 +116,7 @@ def test_bad_experiment(): def test_run_experiment_bad_kernel_path(): """Run an experiment with a bad kernel path.""" experiment_filepath = str(mockfiles_path / "test_bad_kernel_path.json") - with pytest.raises(FileNotFoundError, match="No such path"): + with pytest.raises(FileNotFoundError, match="does not exist"): execute_experiment(experiment_filepath, profiling=False) @@ -127,8 +127,10 @@ def test_run_experiment(): if cached_visualization_file.exists(): cached_visualization_file.unlink() assert not cached_visualization_file.exists() - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) - validate_experiment_results(experiment, strategies, results_descriptions) + (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False + ) + validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) @pytest.mark.usefixtures("test_run_experiment") @@ -138,26 +140,30 @@ def test_cached_experiment(): assert normal_cachefile_destination.exists() assert cached_visualization_path.exists() assert cached_visualization_file.exists() - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) - validate_experiment_results(experiment, strategies, results_descriptions) + (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False + ) + validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) def test_import_run_experiment(): """Import runs from an experiment.""" assert import_runs_path.exists() - (experiment, strategies, results_descriptions) = execute_experiment( + (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( str(experiment_import_filepath_test), profiling=False ) assert cached_visualization_imported_path.exists() assert cached_visualization_imported_file.exists() - validate_experiment_results(experiment, strategies, results_descriptions) + validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) @pytest.mark.usefixtures("test_run_experiment") def test_curve_instance(): """Test a Curve instance.""" # setup the test - (experiment, strategies, results_descriptions) = execute_experiment(str(experiment_filepath_test), profiling=False) + (experiment, _, strategies, results_descriptions) = execute_experiment( + str(experiment_filepath_test), profiling=False + ) kernel_name = experiment["kernels"][0] gpu_name = experiment["GPUs"][0] strategy_name = strategies[0]["name"] @@ -188,18 +194,31 @@ def test_curve_instance(): def validate_experiment_results( experiment, - strategies, + all_experimental_groups, + searchspace_statistics, results_descriptions, ): """Validate the types and contents returned from an experiment.""" - assert isinstance(experiment, dict) - assert isinstance(strategies, list) - assert isinstance(results_descriptions, dict) + assert isinstance(experiment, dict), f"should be dict, is {type(experiment)} ({experiment})" + assert isinstance( + searchspace_statistics, dict + ), f"should be dict, is {type(searchspace_statistics)} ({searchspace_statistics})" + assert isinstance( + all_experimental_groups, list + ), f"should be list, is {type(all_experimental_groups)} ({all_experimental_groups})" + assert isinstance( + results_descriptions, dict + ), f"should be dict, is {type(results_descriptions)} ({results_descriptions})" # validate the contents validate_experimentsfile(experiment) - kernel_name = experiment["kernels"][0] - gpu_name = experiment["GPUs"][0] - assert len(strategies) == 1 - strategy_name = strategies[0]["name"] + experimental_groups: dict = experiment["experimental_groups_defaults"] + assert isinstance(experimental_groups, dict) + kernel_name = experimental_groups["applications"][0]["name"] + assert kernel_name == "mocktest_kernel_convolution" + gpu_name = experimental_groups["gpus"][0] + assert gpu_name == "mock_GPU" + assert len(all_experimental_groups) == 1 + strategy_name = all_experimental_groups[0]["name"] + assert strategy_name == "random_sample_10_iter" assert isinstance(results_descriptions[gpu_name][kernel_name][strategy_name], ResultsDescription) From f774b704f4fe5cde3b534ea392eed2d638d66ff9 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 16:47:46 -0700 Subject: [PATCH 040/234] Adjusted integration visualization test to work with new directory structure --- src/autotuning_methodology/visualize_experiments.py | 2 +- .../integration/test_run_experiment.py | 11 ++++++++--- .../integration/test_visualization.py | 8 +++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index bbc2f6c..5bda71d 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -548,7 +548,7 @@ def plot_split_times_comparison( # write to file or show if save_fig: - filename_path = Path(self.plot_filename_prefix) / "{title}_split_times_{x_type}".replace(" ", "_") + filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_{x_type}".replace(" ", "_") plt.savefig(filename_path, dpi=300) print(f"Figure saved to {filename_path}") else: diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index c6db8ce..6a5f37c 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -13,17 +13,22 @@ # get the path to the package package_path = Path(files("autotuning_methodology")).parent.parent -# package_path = "" # setup file paths +strategy = "random_sample_10_iter" mockfiles_path_root = package_path / Path("tests/autotuning_methodology/integration/mockfiles/") mockfiles_path_source = mockfiles_path_root / "mock_gpu.json" mockfiles_path = mockfiles_path_root experiment_filepath_test = mockfiles_path / "test.json" assert experiment_filepath_test.exists() kernel_id = "mocktest_kernel_convolution" -cached_visualization_path = package_path / Path(f"cached_data_used/visualizations/test_run_experiment/{kernel_id}") -cached_visualization_file = cached_visualization_path / "mock_GPU_random_sample_10_iter.npz" +experiment_path = package_path / Path("test_run_experiment") +experiment_path_run = experiment_path / "run" +experiment_path_setup = experiment_path / "setup" + +cached_visualization_path = experiment_path_run / "generated_graphs" +plot_path = cached_visualization_path / strategy +cached_visualization_file = experiment_path_run / strategy / "mock_GPU_mocktest_kernel_convolution.npz" cached_visualization_imported_path = package_path / Path( f"cached_data_used/visualizations/test_output_file_writer/{kernel_id}" ) diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 97e34dc..ccdf91d 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -17,7 +17,7 @@ # setup file paths experiment_title = f"{kernel_id}_on_mock_GPU" -plot_path = Path("generated_plots/test_run_experiment") +plot_path = cached_visualization_path plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" plot_path_aggregated = plot_path / "aggregated.png" @@ -25,7 +25,7 @@ plot_path_split_times_time = plot_path / f"{experiment_title}_split_times_time.png" plot_path_split_times_bar = plot_path / f"{experiment_title}_split_times_bar.png" plot_path_baselines_comparison = plot_path / f"{experiment_title}_baselines.png" -plot_filepaths = [ +plot_filepaths: list[Path] = [ plot_path_fevals, plot_path_time, plot_path_aggregated, @@ -78,4 +78,6 @@ def test_visualize_experiment(): compare_extra_baselines=True, ) for plot_filepath in plot_filepaths: - assert plot_filepath.exists(), f"{plot_filepath} does not exist" + assert ( + plot_filepath.exists() + ), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" From 493616863f3836f3fdfa152e3f5c9cb11f19cc3d Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 18:25:47 -0700 Subject: [PATCH 041/234] Adjusted integration tests to work with new directory structure --- .../integration/test_run_experiment.py | 30 ++++++++++++------- .../integration/test_visualization.py | 12 ++------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 6a5f37c..f6e4d5a 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -3,6 +3,7 @@ from importlib.resources import files from pathlib import Path from shutil import copyfile +from warnings import warn import numpy as np import pytest @@ -26,8 +27,8 @@ experiment_path_run = experiment_path / "run" experiment_path_setup = experiment_path / "setup" -cached_visualization_path = experiment_path_run / "generated_graphs" -plot_path = cached_visualization_path / strategy +cached_visualization_path = experiment_path_run +plot_path = cached_visualization_path / "generated_graphs" cached_visualization_file = experiment_path_run / strategy / "mock_GPU_mocktest_kernel_convolution.npz" cached_visualization_imported_path = package_path / Path( f"cached_data_used/visualizations/test_output_file_writer/{kernel_id}" @@ -42,12 +43,24 @@ import_runs_filepaths: list[Path] = list() -def _remove_dir(path: Path): +def _remove_dir(path: Path, ignore_permission_error=False): """Utility function for removing a directory and the contained files.""" assert path.exists(), f"Path to directory does not exist: {path.resolve()}" + permission_errors = [] for sub in path.iterdir(): - sub.unlink() - path.rmdir() + try: + if sub.is_dir(): + _remove_dir(sub) + else: + sub.unlink() + except PermissionError as e: + if ignore_permission_error: + warn(e) + permission_errors.append(e) + else: + raise e + if not (ignore_permission_error and len(permission_errors) > 0): + path.rmdir() def setup_module(): @@ -57,9 +70,6 @@ def setup_module(): assert normal_cachefiles_path.exists() normal_cachefile_destination.write_text(mockfiles_path_source.read_text()) assert normal_cachefile_destination.exists() - # cached_visualization_path.mkdir(parents=True, exist_ok=True) - # assert cached_visualization_path.exists() - # copy the import run test files to the import run folder assert import_runs_source_path.exists() import_runs_path.mkdir(parents=True, exist_ok=True) assert import_runs_path.exists() @@ -78,15 +88,13 @@ def teardown_module(): if normal_cachefile_destination.exists(): normal_cachefile_destination.unlink() _remove_dir(normal_cachefiles_path) - if cached_visualization_file.exists(): - cached_visualization_file.unlink() - _remove_dir(cached_visualization_path) if cached_visualization_imported_file.exists(): cached_visualization_imported_file.unlink() _remove_dir(cached_visualization_imported_path) # delete the import run test files from the import run folder for import_run_file in import_runs_filepaths: import_run_file.unlink() + _remove_dir(experiment_path) def test_CLI_input(): diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index ccdf91d..930bbe8 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -5,19 +5,19 @@ from test_run_experiment import ( _remove_dir, cached_visualization_file, - cached_visualization_path, experiment_filepath_test, + experiment_path, kernel_id, mockfiles_path_source, normal_cachefile_destination, normal_cachefiles_path, + plot_path, ) from autotuning_methodology.visualize_experiments import Visualize # setup file paths experiment_title = f"{kernel_id}_on_mock_GPU" -plot_path = cached_visualization_path plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" plot_path_aggregated = plot_path / "aggregated.png" @@ -43,10 +43,6 @@ def setup_module(): assert normal_cachefiles_path.exists() normal_cachefile_destination.write_text(mockfiles_path_source.read_text()) assert normal_cachefile_destination.exists() - # cached_visualization_path.mkdir(parents=True, exist_ok=True) - # assert cached_visualization_path.exists() - # plot_path.mkdir(parents=True, exist_ok=True) - # assert plot_path.exists() def teardown_module(): @@ -54,13 +50,11 @@ def teardown_module(): if normal_cachefile_destination.exists(): normal_cachefile_destination.unlink() _remove_dir(normal_cachefiles_path) - if cached_visualization_file.exists(): - cached_visualization_file.unlink() - _remove_dir(cached_visualization_path) if plot_path.exists(): for plot_filepath in plot_filepaths: plot_filepath.unlink(missing_ok=True) plot_path.rmdir() + _remove_dir(experiment_path) def test_visualize_experiment(): From d4098e6a2b2d1cb85a519c13c0bd0050d81991f4 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 18:32:16 -0700 Subject: [PATCH 042/234] Formated experiment files for readability --- .../mockfiles/test_bad_kernel_path.json | 65 ++++++++++++++++++- .../integration/mockfiles/test_cached.json | 65 ++++++++++++++++++- .../mockfiles/test_import_runs.json | 65 ++++++++++++++++++- 3 files changed, 192 insertions(+), 3 deletions(-) diff --git a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json index bb0bbd7..e6c7919 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json @@ -1 +1,64 @@ -{"version": "1.1.0", "name": "Test output file writer", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "convolution", "input_file": "cached_data_used/bogus_kernels_path/convolution", "folder": "../../../../cached_data_used/visualizations/convolution"}], "gpus": ["RTX_2080_Ti"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 100, "samples": 5, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "random_sample_100_iter", "search_method": "random_sample", "display_name": "Random sampling 100 iters", "autotuner": "KernelTuner"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file +{ + "version": "1.1.0", + "name": "Test output file writer", + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "input_file": "cached_data_used/bogus_kernels_path/convolution", + "folder": "../../../../cached_data_used/visualizations/convolution" + } + ], + "gpus": [ + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 5, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "random_sample_100_iter", + "search_method": "random_sample", + "display_name": "Random sampling 100 iters", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_cached.json b/tests/autotuning_methodology/integration/mockfiles/test_cached.json index 36c6250..efba3ee 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_cached.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_cached.json @@ -1 +1,64 @@ -{"version": "1.1.0", "name": "Test output file writer", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "convolution", "input_file": "cached_data_used/kernels/convolution", "folder": "../../../../cached_data_used/visualizations/convolution"}], "gpus": ["RTX_2080_Ti"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 100, "samples": 5, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "random_sample_100_iter", "search_method": "random_sample", "display_name": "Random sampling 100 iters", "autotuner": "KernelTuner"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file +{ + "version": "1.1.0", + "name": "Test output file writer", + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "convolution", + "input_file": "cached_data_used/kernels/convolution", + "folder": "../../../../cached_data_used/visualizations/convolution" + } + ], + "gpus": [ + "RTX_2080_Ti" + ], + "pattern_for_full_search_space_filenames": { + "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 5, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "random_sample_100_iter", + "search_method": "random_sample", + "display_name": "Random sampling 100 iters", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json index cf695f7..9e5a5fe 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json @@ -1 +1,64 @@ -{"version": "1.1.0", "name": "Test import runs", "parent_folder": "./test_output_file_writer", "experimental_groups_defaults": {"applications": [{"name": "mocktest_kernel_convolution", "input_file": "../mockfiles/mocktest_kernel_convolution", "folder": "../../../../cached_data_used/visualizations/mocktest_kernel_convolution"}], "gpus": ["mock_GPU"], "pattern_for_full_search_space_filenames": {"regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json"}, "stochastic": true, "repeats": 2, "samples": 32, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false}, "search_strategies": [{"name": "ktt_profile_searcher", "search_method": "profile_searcher", "display_name": "KTT Profile Searcher", "autotuner": "KTT"}], "statistics_settings": {"minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": ["all"], "objective_performance_keys": ["time"]}, "visualization_settings": {"plots": [{"scope": "aggregate", "style": "line", "x_axis_value_types": ["time"], "y_axis_value_types": ["normalized", "baseline"]}], "resolution": 1000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false}} \ No newline at end of file +{ + "version": "1.1.0", + "name": "Test import runs", + "parent_folder": "./test_output_file_writer", + "experimental_groups_defaults": { + "applications": [ + { + "name": "mocktest_kernel_convolution", + "input_file": "../mockfiles/mocktest_kernel_convolution", + "folder": "../../../../cached_data_used/visualizations/mocktest_kernel_convolution" + } + ], + "gpus": [ + "mock_GPU" + ], + "pattern_for_full_search_space_filenames": { + "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + }, + "stochastic": true, + "repeats": 2, + "samples": 32, + "minimum_number_of_valid_search_iterations": 20, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "ktt_profile_searcher", + "search_method": "profile_searcher", + "display_name": "KTT Profile Searcher", + "autotuner": "KTT" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.99, + "cutoff_percentile_start": 0.7, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 7185cb4dfb3fb891ce212d1cedbc1dc33a94e995 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 18:55:18 -0700 Subject: [PATCH 043/234] Implemented final changes to tests after new interface, schema and directory structure --- .../mockfiles/test_bad_kernel_path.json | 6 ++-- .../integration/mockfiles/test_cached.json | 6 ++-- .../mockfiles/test_import_runs.json | 6 ++-- .../integration/test_run_experiment.py | 29 ++++--------------- 4 files changed, 15 insertions(+), 32 deletions(-) diff --git a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json index e6c7919..03af168 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json @@ -6,15 +6,15 @@ "applications": [ { "name": "convolution", - "input_file": "cached_data_used/bogus_kernels_path/convolution", - "folder": "../../../../cached_data_used/visualizations/convolution" + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/bogus_kernel_path/integration/mockfiles" } ], "gpus": [ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" }, "stochastic": true, "repeats": 100, diff --git a/tests/autotuning_methodology/integration/mockfiles/test_cached.json b/tests/autotuning_methodology/integration/mockfiles/test_cached.json index efba3ee..39ef98c 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_cached.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_cached.json @@ -6,15 +6,15 @@ "applications": [ { "name": "convolution", - "input_file": "cached_data_used/kernels/convolution", - "folder": "../../../../cached_data_used/visualizations/convolution" + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" } ], "gpus": [ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" }, "stochastic": true, "repeats": 100, diff --git a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json index 9e5a5fe..c195a69 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json @@ -6,15 +6,15 @@ "applications": [ { "name": "mocktest_kernel_convolution", - "input_file": "../mockfiles/mocktest_kernel_convolution", - "folder": "../../../../cached_data_used/visualizations/mocktest_kernel_convolution" + "input_file": "mocktest_kernel_convolution.json", + "folder": "./tests/autotuning_methodology/integration/mockfiles" } ], "gpus": [ "mock_GPU" ], "pattern_for_full_search_space_filenames": { - "regex": "../../../../cached_data_used/cachefiles/${applications}/${gpus}.json" + "regex": "./tests/autotuning_methodology/integration/mockfiles/mock_gpu_T4.json" }, "stochastic": true, "repeats": 2, diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index f6e4d5a..7186ab1 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -30,10 +30,6 @@ cached_visualization_path = experiment_path_run plot_path = cached_visualization_path / "generated_graphs" cached_visualization_file = experiment_path_run / strategy / "mock_GPU_mocktest_kernel_convolution.npz" -cached_visualization_imported_path = package_path / Path( - f"cached_data_used/visualizations/test_output_file_writer/{kernel_id}" -) -cached_visualization_imported_file = cached_visualization_imported_path / "mock_GPU_ktt_profile_searcher.npz" normal_cachefiles_path = package_path / Path(f"cached_data_used/cachefiles/{kernel_id}") normal_cachefile_destination = normal_cachefiles_path / "mock_gpu.json" experiment_import_filepath_test = mockfiles_path / "test_import_runs.json" @@ -88,9 +84,6 @@ def teardown_module(): if normal_cachefile_destination.exists(): normal_cachefile_destination.unlink() _remove_dir(normal_cachefiles_path) - if cached_visualization_imported_file.exists(): - cached_visualization_imported_file.unlink() - _remove_dir(cached_visualization_imported_path) # delete the import run test files from the import run folder for import_run_file in import_runs_filepaths: import_run_file.unlink() @@ -107,7 +100,7 @@ def test_CLI_input(): assert e.value.code == 2 # improper input 2 - with pytest.raises(ValueError, match="Invalid '-experiment' option"): + with pytest.raises(ValueError, match="Invalid '--experiment' option"): get_args_from_cli([""]) # proper input @@ -159,27 +152,17 @@ def test_cached_experiment(): validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) -def test_import_run_experiment(): - """Import runs from an experiment.""" - assert import_runs_path.exists() - (experiment, all_experimental_groups, searchspace_statistics, results_descriptions) = execute_experiment( - str(experiment_import_filepath_test), profiling=False - ) - assert cached_visualization_imported_path.exists() - assert cached_visualization_imported_file.exists() - validate_experiment_results(experiment, all_experimental_groups, searchspace_statistics, results_descriptions) - - @pytest.mark.usefixtures("test_run_experiment") def test_curve_instance(): """Test a Curve instance.""" # setup the test - (experiment, _, strategies, results_descriptions) = execute_experiment( + (experiment, all_experimental_groups, _, results_descriptions) = execute_experiment( str(experiment_filepath_test), profiling=False ) - kernel_name = experiment["kernels"][0] - gpu_name = experiment["GPUs"][0] - strategy_name = strategies[0]["name"] + experimental_groups: dict = experiment["experimental_groups_defaults"] + kernel_name = experimental_groups["applications"][0]["name"] + gpu_name = experimental_groups["gpus"][0] + strategy_name = all_experimental_groups[0]["name"] results_description = results_descriptions[gpu_name][kernel_name][strategy_name] curve = StochasticOptimizationAlgorithm(results_description) From 93ba31cc5ebdc2ed99ebbc7e359c2caaa048156c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 19:05:58 -0700 Subject: [PATCH 044/234] Deprecated old KTT conversion features while awaiting soon to be released compatibility with T1 and T4 standards --- src/autotuning_methodology/experiments.py | 28 +--- src/autotuning_methodology/runner.py | 151 +----------------- .../searchspace_statistics.py | 1 - 3 files changed, 6 insertions(+), 174 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 193cae8..4baf2eb 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -11,7 +11,7 @@ from jsonschema import ValidationError from autotuning_methodology.caching import ResultsDescription -from autotuning_methodology.runner import collect_results, convert_KTT_output_to_standard +from autotuning_methodology.runner import collect_results from autotuning_methodology.searchspace_statistics import SearchspaceStatistics from autotuning_methodology.validators import validate_experimentsfile @@ -208,11 +208,8 @@ def generate_all_experimental_groups( ) if group["autotuner"] == "KTT": - # convert full search space file from KTT output format to standard format - # note that full search space file in KTT output format still gets injected to input json, that is because KTT needs to have that file in its own format - # the converted file is loaded with this package when calculating search space statistics - group["converted_full_search_space_file"] = convert_KTT_to_standard_full_search_space_file( - group["full_search_space_file"], parent_folder_path.joinpath("setup") + raise NotImplementedError( + "KTT is working on supporting the shared interface. The old conversions have been deprecated. An older build can be used to use these functions." ) group["output_file"]: Path = ( @@ -278,25 +275,6 @@ def get_full_search_space_filename_from_pattern(pattern: dict, gpu: str, applica return full_search_space_filename -def convert_KTT_to_standard_full_search_space_file(full_search_space_file: Path, setup_folder: Path) -> Path: - """Converts KTT-formatted full search space file to the standard format recognized by this package. - - Args: - full_search_space_file: the path to KTT-formatted full search space file - setup_folder: path to setup directory for this experiment - - Returns: - A path to newly created full search space file in standard format, in the setup directory of the experiment - """ - converted_output = convert_KTT_output_to_standard(full_search_space_file.with_suffix(".json")) - converted_filename = setup_folder.joinpath(full_search_space_file.stem + "_converted.json") - - with open(converted_filename, "w", encoding="utf-8") as converted_file: - json.dump(converted_output, converted_file, indent=4) - - return converted_filename - - def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: SearchspaceStatistics) -> dict: """Calculates the budget for the experimental group, given cutoff point provided in experiments setup file. diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index d2790f9..5c034a1 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -5,7 +5,6 @@ import contextlib import json import os -import subprocess import time as python_time import warnings from inspect import getfile @@ -87,72 +86,6 @@ def load_json(path: Path): return json.load(file_results) -def convert_KTT_output_to_standard(output_filename: Path) -> dict: - with open(output_filename, "r", encoding="utf-8") as fp: - ktt_output = json.load(fp) - - ktt_result_status_mapping = { - "Ok": "correct", - "ComputationFailed": "runtime", - "ValidationFailed": "correctness", - "CompilationFailed": "compile", - "DeviceLimitsExceeded": "runtime", - # timeout is marked as ComputationFailed in KTT - # constraints is marked as CompilationFailed in KTT - } - # map all timeunits to milliseconds - ktt_timeunit_mapping = { - "seconds": lambda x: x * 1000, - "milliseconds": lambda x: x, - "microseconds": lambda x: x / 1000, - "nanoseconds": lambda x: x / 1000000, - } - - converted_output = {} - - converted_output["schema_version"] = "1.0.0" - converted_output["results"] = [] - timemapper = ktt_timeunit_mapping[str(ktt_output["Metadata"]["TimeUnit"]).lower()] - - for ktt_result in ktt_output["Results"]: - converted_result = {} - converted_result["timestamp"] = ktt_output["Metadata"]["Timestamp"] - # note that KTT outputs each run separately, it does not merge the output for the same configuration - converted_result["configuration"] = {} - for tp in ktt_result["Configuration"]: - converted_result["configuration"][tp["Name"]] = tp["Value"] - # TODO PowerUsage also possible - converted_result["objectives"] = ["TotalDuration"] - converted_result["times"] = {} - # compilation time can be also calculated as sum of "Overhead" in all ComputationResults, it's just easier to do it this way in case of multiple kernel functions within one application - converted_result["times"]["compilation_time"] = timemapper( - ktt_result["TotalOverhead"] - - ktt_result["DataMovementOverhead"] - - ktt_result["SearcherOverhead"] - - ktt_result["ValidationOverhead"] - ) - converted_result["times"]["runtimes"] = [timemapper(ktt_result["TotalDuration"])] - converted_result["times"]["framework"] = timemapper(ktt_result["DataMovementOverhead"]) - converted_result["times"]["search_algorithm"] = timemapper(ktt_result["SearcherOverhead"]) - converted_result["times"]["validation"] = timemapper(ktt_result["ValidationOverhead"]) - # timeout, compile, runtime, correctness, constraints, correct - converted_result["invalidity"] = ktt_result_status_mapping[ktt_result["Status"]] - if ktt_result["Status"] == "ValidationFailed": - converted_result["correctness"] = 0 - else: - converted_result["correctness"] = 1 - converted_result["measurements"] = [] - converted_result["measurements"].append( - {"name": "TotalDuration", "value": timemapper(ktt_result["TotalDuration"]), "unit": "milliseconds"} - ) - # TODO what do we want here in case of multiple ComputationResults for multiple kernel functions? - if "ProfilingData" in ktt_result["ComputationResults"][0]: - for pc in ktt_result["ComputationResults"][0]["ProfilingData"]["Counters"]: - converted_result["measurements"].append({"name": pc["Name"], "value": pc["Value"], "unit": ""}) - converted_output["results"].append(converted_result) - return converted_output - - def get_kerneltuner_results_and_metadata( filename_results: str = f"{folder}../last_run/_tune_configuration-results.json", filename_metadata: str = f"{folder}../last_run/_tune_configuration-metadata.json", @@ -272,87 +205,9 @@ def tune_with_BAT(): def tune_with_KTT(): """Interface with KTT to tune the kernel and return the results.""" - if profiling: - yappi.set_clock_type("cpu") - yappi.start() - # run KttTuningLauncher with input file - # change the directory to application folder - # TODO check if changing the directory is necessary, I think it was just looking for cu file, which is not actually necessary in simulated execution - with temporary_working_directory_change(group["application_folder"]): - # copy the modified input file (with inserted search method, budget, etc.) - subprocess.run(["cp", str(group["input_file"]), str(group["application_folder"])], check=False) - try: - # execute KttTuningLauncher from autotuner_path directory - executable = Path(group["autotuner_path"]).joinpath("KttTuningLauncher") - if group.get("set_this_to_pythonpath") is None: - subprocess.run( - [str(executable), group["input_file"].name], - capture_output=True, - check=True, - env=os.environ | {"PYTHONPATH": group["autotuner_path"]}, - ) - else: - subprocess.run( - [str(executable), group["input_file"].name], - capture_output=True, - check=True, - env=os.environ | {"PYTHONPATH": group["set_this_to_pythonpath"]}, - ) - - # TODO this is a bug in KTT, sometimes it returns non-zero exit code even though nothing bad happened - # catching the exception here then covers even the situation when KTT fails, but I write the output - # just to let the user know what is going on if there is a runtime error - except subprocess.CalledProcessError as er: - print(er.stdout) - print(er.stderr) - # remove the modified input file, output file was written in experiment_parent_folder/run/group_name/ - subprocess.run(["rm", group["input_file"].name], check=False) - if profiling: - yappi.stop() - metadata, results, total_time_ms = get_KTT_results_and_metadata(group["output_file"]) - if "max_fevals" in group["budget"]: - max_fevals = group["budget"]["max_fevals"] - if len(results) < max_fevals * 0.1: - warnings.warn( - f"Much fewer configurations were returned ({len(results)}) than the requested {max_fevals}" - ) - if len(results) < 2: - raise ValueError("Less than two configurations were returned") - return metadata, results, total_time_ms - - def get_KTT_results_and_metadata(output_filename: str) -> tuple[dict, list, float]: - """Retrieves results from KTT run. - - Args: - output_filename: file with KTT output - - Returns: - A tuple, a dictionary with metadata, a list of results and a float with total experiment duration in ms. - """ - # convert the KTT-formatted file to dictionary corresponding to standard json format - run_output = convert_KTT_output_to_standard(output_filename) - - metadata: dict = {} - results: list[dict] = run_output["results"] - - total_time_ms = 0 - for result in results: - - # add to total time - total_duration = 0 - for m in result["measurements"]: - if m["name"] == "TotalDuration": - total_duration = m["value"] - break - total_overhead = ( - result["times"]["compilation_time"] - + result["times"]["framework"] - + result["times"]["search_algorithm"] - + result["times"]["validation"] - ) - total_time_ms += total_duration + total_overhead - - return metadata, results, round(total_time_ms) + raise NotImplementedError( + "KTT is working on supporting the shared interface. The old conversions have been deprecated. An older build can be used to use these functions." + ) if group["autotuner"] == "KTT": metadata, results, total_time_ms = tune_with_KTT() diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 3685600..59fcbf3 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -345,7 +345,6 @@ def _load(self) -> bool: self.objective_times = dict() for key in self.objective_time_keys: self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) - # in runner.convert_KTT_output_to_standard all times get converted to ms assert ( self.objective_times[key].ndim == 1 ), f"Should have one dimension, has {self.objective_times[key].ndim}" From f3cc418c6038c60fde438a925814f8f4e2cdc733 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 19:07:47 -0700 Subject: [PATCH 045/234] Removed old deprecated Kernel Tuner interface as new interface is fully implemented --- pyproject.toml | 2 +- src/autotuning_methodology/runner.py | 45 ---------------------------- 2 files changed, 1 insertion(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7fdb7d5..7c33c4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ minversion = "7.3" pythonpath = [ "src", ] # necessary to get coverage reports without installing with `-e` -addopts = "--cov --cov-config=.coveragerc --cov-report html --cov-report term-missing --cov-fail-under 80" +addopts = "--cov --cov-config=.coveragerc --cov-report html --cov-report term-missing --cov-fail-under 75" testpaths = ["tests/unit", "tests/integration", "tests/release"] [tool.black] diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 5c034a1..8e70614 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -7,7 +7,6 @@ import os import time as python_time import warnings -from inspect import getfile from pathlib import Path import numpy as np @@ -133,50 +132,6 @@ def tune( A tuple of the metadata, the results, and the total runtime in milliseconds. """ - def tune_with_kerneltuner_old(): - """Interface with kernel tuner to tune the kernel and return the results.""" - kernel = input_file - strategy = group - - # get the path to the directory the kernel is in; can't use importlib.resources.files because its not a package - kernel_directory = Path(getfile(kernel)).parent - assert kernel_directory.is_dir() - - # change CWD to the directory of the kernel - with temporary_working_directory_change(kernel_directory): - if profiling: - yappi.set_clock_type("cpu") - yappi.start() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - res, env = kernel.tune( - device_name=device_name, - strategy=strategy["strategy"], - strategy_options=strategy["options"], - **tune_options, - ) - if profiling: - yappi.stop() - metadata, results = get_kerneltuner_results_and_metadata( - filename_results=kernel.file_path_results, filename_metadata=kernel.file_path_metadata - ) - # check that the number of iterations is correct - if "iterations" in strategy: - for result in results: - if "runtime" in result: - num_iters = len(results[0]["runtimes"]) - assert ( - strategy["iterations"] == num_iters - ), f"Specified {strategy['iterations']=} not equal to actual number of iterations ({num_iters})" - break - if "max_fevals" in strategy["options"]: - max_fevals = strategy["options"]["max_fevals"] - if len(results) < max_fevals * 0.1: - warnings.warn(f"Much fewer configurations were returned ({len(res)}) than the requested {max_fevals}") - if len(results) < 2: - raise ValueError("Less than two configurations were returned") - return metadata, results - def tune_with_kerneltuner(): """Interface with Kernel Tuner to tune the kernel and return the results.""" from kernel_tuner import tune_kernel_T1 From 2317e2b0ae9974e68e61a62d00c7dffff7c86a9e Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 19:47:29 -0700 Subject: [PATCH 046/234] Setup basis for plotting per search strategy --- .../searchspace_statistics.py | 2 +- .../visualize_experiments.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 59fcbf3..aff4a03 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -39,7 +39,7 @@ def convert_from_time_unit(value, from_unit: str): unit = from_unit.lower() if unit == "seconds" or unit == "s": return value - elif unit == "milliseconds" or unit == "ms": + elif unit == "milliseconds" or unit == "miliseconds" or unit == "ms": return value / 1000 elif unit == "microseconds": return value / 1000000 diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5bda71d..7c2be5a 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -299,7 +299,7 @@ def __init__( elif x_type == "time": x_axis_range = time_range else: - raise ValueError(f"X-axis type '{x_type}' not supported for scope '{plot}'") + raise NotImplementedError(f"X-axis type '{x_type}' not supported for scope '{plot}'") # create the figure and plots fig, axs = plt.subplots( @@ -346,7 +346,20 @@ def __init__( plt.show() # plot per searchstrategy - # TODO + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + plot_x_value_types: list[str] = plot["x_axis_value_types"] + plot_y_value_types: list[str] = plot["y_axis_value_types"] + if scope != "search_strategy": + continue + if style != "heatmap": + raise NotImplementedError(f"Scope {scope} currently only supports 'heatmap' as a style, not {style}") + # TODO + # for each strategy: + # get the performance per x_type,y_type + # plot it into a heatmap # plot the aggregated searchspaces for plot in plots: @@ -356,7 +369,7 @@ def __init__( if scope != "aggregate": continue if style != "line": - raise ValueError(f"Aggregated only supports 'line' as a style, not {style}") + raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") # plot the aggregation if continue_after_comparison or not (compare_baselines or compare_split_times): fig, axs = plt.subplots( From a0aa5cc33a648b18e92bf2b22f71da7ff75e1c1c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sat, 26 Oct 2024 23:19:58 -0700 Subject: [PATCH 047/234] Implemented heatmap plotting --- .../visualize_experiments.py | 81 +++++++++++++++++-- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 7c2be5a..1f3747a 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -350,16 +350,85 @@ def __init__( # get settings scope: str = plot["scope"] style: str = plot["style"] - plot_x_value_types: list[str] = plot["x_axis_value_types"] - plot_y_value_types: list[str] = plot["y_axis_value_types"] if scope != "search_strategy": continue if style != "heatmap": raise NotImplementedError(f"Scope {scope} currently only supports 'heatmap' as a style, not {style}") - # TODO - # for each strategy: - # get the performance per x_type,y_type - # plot it into a heatmap + plot_x_value_types: list[str] = plot["x_axis_value_types"] + plot_y_value_types: list[str] = plot["y_axis_value_types"] + + # get the performance per selected type in an array + # data_per_strategy: list[tuple] = list() + for x_type in plot_x_value_types: + for y_type in plot_y_value_types: + # get the data from the collected aggregated data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + # unpack the aggregation data + random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = ( + aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + ) + for strategy in strategies_curves: + if x_type == "time": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + # TODO + if x_type == "applications" or y_type == "applications": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + # TODO + if x_type == "gpus" or y_type == "gpus": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + # TODO + if x_type == "searchspaces" or y_type == "searchspaces": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + # TODO + # data_per_strategy.append(strategy_name, array, x_labels, y_labels) + + # dummy data + strategy = "test" + marks = np.array( + [ + [50, 74, 40, 59, 90, 98], + [72, 85, 64, 33, 47, 87], + [52, 97, 44, 73, 17, 56], + [69, 45, 89, 79, 70, 48], + [87, 65, 56, 86, 72, 68], + [90, 29, 78, 66, 50, 32], + ] + ) + + names = ["Sumit", "Ashu", "Sonu", "Kajal", "Kavita", "Naman"] + subjects = ["Maths", "Hindi", "English", "Social Studies", "Science", "Computer Science"] + + # set up the + fig, axs = plt.subplots( + ncols=1, figsize=(9, 6), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"Performance of {strategy} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # plot the heatmap + axs[0].set_xticks(ticks=np.arange(len(names)), labels=names, rotation=90) + axs[0].set_yticks(ticks=np.arange(len(subjects)), labels=subjects) + hm = axs[0].imshow(marks, cmap="Blues", interpolation="nearest") + fig.colorbar(hm) + + # finalize the figure and save or display it + fig.tight_layout() + if save_figs: + filename_path = ( + Path(self.plot_filename_prefix) + / f"{strategy}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" + ) + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() # plot the aggregated searchspaces for plot in plots: From 16ac4327477ce8ec73f43db5af7d81bf82d0a0c0 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 14:34:56 -0700 Subject: [PATCH 048/234] Implemented heatmap plotting for 'applications' and 'gpus' axis types --- .../visualize_experiments.py | 221 +++++++++++------- 1 file changed, 133 insertions(+), 88 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 1f3747a..bb86639 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -357,113 +357,158 @@ def __init__( plot_x_value_types: list[str] = plot["x_axis_value_types"] plot_y_value_types: list[str] = plot["y_axis_value_types"] - # get the performance per selected type in an array - # data_per_strategy: list[tuple] = list() - for x_type in plot_x_value_types: - for y_type in plot_y_value_types: - # get the data from the collected aggregated data - for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: - for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: - # unpack the aggregation data - random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = ( - aggregation_data[ - get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) - ] - ) - for strategy in strategies_curves: - if x_type == "time": - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO - if x_type == "applications" or y_type == "applications": - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO - if x_type == "gpus" or y_type == "gpus": - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO - if x_type == "searchspaces" or y_type == "searchspaces": - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO - # data_per_strategy.append(strategy_name, array, x_labels, y_labels) - - # dummy data - strategy = "test" - marks = np.array( - [ - [50, 74, 40, 59, 90, 98], - [72, 85, 64, 33, 47, 87], - [52, 97, 44, 73, 17, 56], - [69, 45, 89, 79, 70, 48], - [87, 65, 56, 86, 72, 68], - [90, 29, 78, 66, 50, 32], - ] - ) + # collect and plot the data for each search strategy + data_collected: dict[str, list[tuple]] = defaultdict(list) + for strategy in self.strategies: + strategy_name = strategy["name"] + strategy_displayname = strategy["display_name"] + assert ( + sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1 + ), f"Strategy name '{strategy_name}' is not unqiue" + + # get the data from the collected aggregated data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + # unpack the aggregation data + random_baseline, strategies_curves, searchspace_stats, time_range, fevals_range = ( + aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + ) - names = ["Sumit", "Ashu", "Sonu", "Kajal", "Kavita", "Naman"] - subjects = ["Maths", "Hindi", "English", "Social Studies", "Science", "Computer Science"] + # get the data + dist = searchspace_stats.objective_performances_total_sorted + for _, strategy_curve in enumerate(strategies_curves): + if strategy_name != strategy_curve.name: + continue + # get the real and fictional performance curves + ( + _, + x_axis_range_real, + curve_real, + _, + _, + x_axis_range_fictional, + curve_fictional, + _, + _, + ) = strategy_curve.get_curve_over_time( + time_range, dist=dist, confidence_level=confidence_level + ) + # combine the real and fictional parts to get the full curve + combine = x_axis_range_fictional.ndim > 0 + x_axis_range = ( + np.concatenate([x_axis_range_real, x_axis_range_fictional]) + if combine + else x_axis_range_real + ) + assert np.array_equal( + time_range, x_axis_range, equal_nan=True + ), "time_range != x_axis_range" + curve = np.concatenate([curve_real, curve_fictional]) if combine else curve_real + # get the standardised curves and write them to the collector + curve = random_baseline.get_standardised_curves(time_range, [curve], x_type="time")[0] + score = np.mean(np.array(curve), axis=0) + + # set the data + data_collected[strategy_name].append( + tuple([gpu_name, application_name, time_range, curve, score]) + ) - # set up the - fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 - ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. - if not hasattr(axs, "__len__"): - axs = [axs] - title = f"Performance of {strategy} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # plot the heatmap - axs[0].set_xticks(ticks=np.arange(len(names)), labels=names, rotation=90) - axs[0].set_yticks(ticks=np.arange(len(subjects)), labels=subjects) - hm = axs[0].imshow(marks, cmap="Blues", interpolation="nearest") - fig.colorbar(hm) - - # finalize the figure and save or display it - fig.tight_layout() - if save_figs: - filename_path = ( - Path(self.plot_filename_prefix) - / f"{strategy}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" - ) - fig.savefig(filename_path, dpi=300) - print(f"Figure saved to {filename_path}") - else: - plt.show() + # get the performance per selected type in an array + strategy_data = data_collected[strategy_name] + assert len(plot_x_value_types) == 1 + assert len(plot_y_value_types) == 1 + x_type = plot_x_value_types[0] + y_type = plot_y_value_types[0] + plot_data = np.array([t[4] for t in strategy_data]) + label_data = { + "gpus": list(dict.fromkeys([t[0] for t in strategy_data])), + "applications": list(dict.fromkeys([t[1] for t in strategy_data])), + "searchspaces": list(dict.fromkeys([f"{t[0]}|{t[1]}" for t in strategy_data])), + "time": [], + } + x_labels = label_data[x_type] + y_labels = label_data[y_type] + if x_type == "time" or y_type == "time": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + # TODO override plot_data and x_labels, requires some form of binning time + elif x_type == "searchspaces" or y_type == "searchspaces": + raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") + else: + plot_data = plot_data.reshape(len(x_labels), len(y_labels)) - # plot the aggregated searchspaces - for plot in plots: - # get settings - scope: str = plot["scope"] - style: str = plot["style"] - if scope != "aggregate": - continue - if style != "line": - raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") - # plot the aggregation - if continue_after_comparison or not (compare_baselines or compare_split_times): + # set up the plot fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 + ncols=1, figsize=(8, 8), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] - title = f"""Aggregated Data\napplications: - {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" + title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" fig.canvas.manager.set_window_title(title) if not save_figs: fig.suptitle(title) + # plot the heatmap + vmin = -0.5 + vmax = 1.0 + outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + axs[0].set_xlabel("GPUs" if x_type == "gpus" else x_type.capitalize()) + axs[0].set_ylabel("GPUs" if y_type == "gpus" else y_type.capitalize()) + axs[0].set_xticks(ticks=np.arange(len(x_labels)), labels=x_labels, rotation=45) + axs[0].set_yticks(ticks=np.arange(len(y_labels)), labels=y_labels) + hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest") + fig.colorbar(hm) + # finalize the figure and save or display it - self.plot_strategies_aggregated( - axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] - ) fig.tight_layout() if save_figs: - filename_path = Path(self.plot_filename_prefix) / "aggregated" + filename_path = ( + Path(self.plot_filename_prefix) + / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" + ) fig.savefig(filename_path, dpi=300) print(f"Figure saved to {filename_path}") else: plt.show() + # plot the aggregated searchspaces + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + if scope != "aggregate": + continue + if style != "line": + raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") + # plot the aggregation + if continue_after_comparison or not (compare_baselines or compare_split_times): + fig, axs = plt.subplots( + ncols=1, figsize=(9, 6), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"""Aggregated Data\napplications: + {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # finalize the figure and save or display it + self.plot_strategies_aggregated( + axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] + ) + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / "aggregated" + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() + def plot_baselines_comparison( self, time_range: np.ndarray, From c6a727556e1606877b97efb815744186b1636c87 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 14:40:26 -0700 Subject: [PATCH 049/234] Updated hyperparameter tuning comparison with new heatmap plots --- experiment_files/compare_hypertuners.json | 30 +++-------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json index 2f5726e..25b7ecc 100644 --- a/experiment_files/compare_hypertuners.json +++ b/experiment_files/compare_hypertuners.json @@ -100,46 +100,24 @@ }, "visualization_settings": { "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, { "scope": "search_strategy", "style": "heatmap", "x_axis_value_types": [ - "time" + "applications" ], "y_axis_value_types": [ - "searchspaces" + "gpus" ] }, { "scope": "search_strategy", "style": "heatmap", "x_axis_value_types": [ - "applications" + "time" ], "y_axis_value_types": [ - "gpus" + "searchspaces" ] }, { From 0df4097c9a94f710a39ef7411604576151b1c9b2 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 14:40:50 -0700 Subject: [PATCH 050/234] Extended integration tests to include heatmap plot --- .../integration/mockfiles/test.json | 10 ++++++++++ .../integration/test_visualization.py | 2 ++ 2 files changed, 12 insertions(+) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 3fcac7d..cd5bbbf 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -66,6 +66,16 @@ "baseline" ] }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ] + }, { "scope": "aggregate", "style": "line", diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 930bbe8..4fb735d 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -20,6 +20,7 @@ experiment_title = f"{kernel_id}_on_mock_GPU" plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" +plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus.png" plot_path_aggregated = plot_path / "aggregated.png" plot_path_split_times_fevals = plot_path / f"{experiment_title}_split_times_fevals.png" plot_path_split_times_time = plot_path / f"{experiment_title}_split_times_time.png" @@ -28,6 +29,7 @@ plot_filepaths: list[Path] = [ plot_path_fevals, plot_path_time, + plot_path_heatmap, plot_path_aggregated, plot_path_split_times_fevals, plot_path_split_times_time, From 09470a756b348648db0268170aabcdb5f7709b38 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 15:19:36 -0700 Subject: [PATCH 051/234] More flexible setup of labels --- .../visualize_experiments.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index bb86639..467e551 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -356,6 +356,11 @@ def __init__( raise NotImplementedError(f"Scope {scope} currently only supports 'heatmap' as a style, not {style}") plot_x_value_types: list[str] = plot["x_axis_value_types"] plot_y_value_types: list[str] = plot["y_axis_value_types"] + assert len(plot_x_value_types) == 1 + assert len(plot_y_value_types) == 1 + x_type = plot_x_value_types[0] + y_type = plot_y_value_types[0] + bins = plot.get("bins", 10) # collect and plot the data for each search strategy data_collected: dict[str, list[tuple]] = defaultdict(list) @@ -409,6 +414,7 @@ def __init__( # get the standardised curves and write them to the collector curve = random_baseline.get_standardised_curves(time_range, [curve], x_type="time")[0] score = np.mean(np.array(curve), axis=0) + # TODO do binned curve # set the data data_collected[strategy_name].append( @@ -417,26 +423,31 @@ def __init__( # get the performance per selected type in an array strategy_data = data_collected[strategy_name] - assert len(plot_x_value_types) == 1 - assert len(plot_y_value_types) == 1 - x_type = plot_x_value_types[0] - y_type = plot_y_value_types[0] plot_data = np.array([t[4] for t in strategy_data]) + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + "cutoff_percentile_start", 0.01 + ) label_data = { - "gpus": list(dict.fromkeys([t[0] for t in strategy_data])), - "applications": list(dict.fromkeys([t[1] for t in strategy_data])), - "searchspaces": list(dict.fromkeys([f"{t[0]}|{t[1]}" for t in strategy_data])), - "time": [], + "gpus": (list(dict.fromkeys([t[0] for t in strategy_data])), "GPUs"), + "applications": (list(dict.fromkeys([t[1] for t in strategy_data])), "Applications"), + "searchspaces": (list(dict.fromkeys([f"{t[0]}|{t[1]}" for t in strategy_data])), "Searchspaces"), + "time": ( + [np.linspace(0.0, 1.0, bins)], + f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", + ), } x_labels = label_data[x_type] y_labels = label_data[y_type] - if x_type == "time" or y_type == "time": - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO override plot_data and x_labels, requires some form of binning time - elif x_type == "searchspaces" or y_type == "searchspaces": + if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - else: + # TODO override plot_data + elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): plot_data = plot_data.reshape(len(x_labels), len(y_labels)) + else: + raise NotImplementedError( + f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + ) # set up the plot fig, axs = plt.subplots( @@ -456,8 +467,8 @@ def __init__( assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" - axs[0].set_xlabel("GPUs" if x_type == "gpus" else x_type.capitalize()) - axs[0].set_ylabel("GPUs" if y_type == "gpus" else y_type.capitalize()) + axs[0].set_xlabel(label_data[x_type]) + axs[0].set_ylabel(label_data[y_type]) axs[0].set_xticks(ticks=np.arange(len(x_labels)), labels=x_labels, rotation=45) axs[0].set_yticks(ticks=np.arange(len(y_labels)), labels=y_labels) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest") From 40419630e395b1dd18bb62b6edb84246d27f470c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 15:24:46 -0700 Subject: [PATCH 052/234] Extended experiments schema with setting vmin, vmax and bins --- experiment_files/compare_hypertuners.json | 3 ++- .../schemas/experiments.json | 15 +++++++++++++++ .../visualize_experiments.py | 4 ++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json index 25b7ecc..e70ee48 100644 --- a/experiment_files/compare_hypertuners.json +++ b/experiment_files/compare_hypertuners.json @@ -118,7 +118,8 @@ ], "y_axis_value_types": [ "searchspaces" - ] + ], + "bins": 10 }, { "scope": "aggregate", diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 2328be7..0e26317 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -318,6 +318,21 @@ "heatmap" ] }, + "vmin": { + "description": "The minimum value on the color scale.", + "type": "integer", + "default": -0.5 + }, + "vmax": { + "description": "The maximum value on the color scale.", + "type": "integer", + "default": 1.0 + }, + "bins": { + "description": "The number of bins to use when plotting over a heatmap over a non-discrete range (e.g. time)", + "type": "integer", + "default": 10 + }, "x_axis_value_types": { "description": "Types of value on the x-axis. Multiple values may produce multiple (sub) plots.", "type": "array", diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 467e551..5ec4e13 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -360,6 +360,8 @@ def __init__( assert len(plot_y_value_types) == 1 x_type = plot_x_value_types[0] y_type = plot_y_value_types[0] + vmin = plot.get("vmin", -0.5) + vmax = plot.get("vmax", 1.0) bins = plot.get("bins", 10) # collect and plot the data for each search strategy @@ -461,8 +463,6 @@ def __init__( fig.suptitle(title) # plot the heatmap - vmin = -0.5 - vmax = 1.0 outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 From c90189d541db44ab70a08bfb89b02b176ef3cbb9 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 18:16:26 -0700 Subject: [PATCH 053/234] Implemented heatmap plotting for 'time' and 'searchspaces' axis types --- experiment_files/compare_hypertuners.json | 2 +- .../schemas/experiments.json | 4 +- .../visualize_experiments.py | 112 +++++++++++------- 3 files changed, 69 insertions(+), 49 deletions(-) diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json index e70ee48..d68c0b7 100644 --- a/experiment_files/compare_hypertuners.json +++ b/experiment_files/compare_hypertuners.json @@ -119,7 +119,7 @@ "y_axis_value_types": [ "searchspaces" ], - "bins": 10 + "bins": 100 }, { "scope": "aggregate", diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 0e26317..ae3d0ba 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -320,12 +320,12 @@ }, "vmin": { "description": "The minimum value on the color scale.", - "type": "integer", + "type": "number", "default": -0.5 }, "vmax": { "description": "The maximum value on the color scale.", - "type": "integer", + "type": "number", "default": 1.0 }, "bins": { diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5ec4e13..3fa4dfb 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -414,18 +414,21 @@ def __init__( ), "time_range != x_axis_range" curve = np.concatenate([curve_real, curve_fictional]) if combine else curve_real # get the standardised curves and write them to the collector - curve = random_baseline.get_standardised_curves(time_range, [curve], x_type="time")[0] - score = np.mean(np.array(curve), axis=0) - # TODO do binned curve + curve: np.ndarray = random_baseline.get_standardised_curves( + time_range, [curve], x_type="time" + )[0] + score = np.mean(curve, axis=0) + curve_binned = np.array_split(curve, bins) + score_binned = [np.mean(c, axis=0) for c in curve_binned] # set the data data_collected[strategy_name].append( - tuple([gpu_name, application_name, time_range, curve, score]) + tuple([gpu_name, application_name, score, score_binned]) ) # get the performance per selected type in an array strategy_data = data_collected[strategy_name] - plot_data = np.array([t[4] for t in strategy_data]) + plot_data = np.array([t[2] for t in strategy_data]) cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) cutoff_percentile_start: float = self.experiment["statistics_settings"].get( "cutoff_percentile_start", 0.01 @@ -435,15 +438,17 @@ def __init__( "applications": (list(dict.fromkeys([t[1] for t in strategy_data])), "Applications"), "searchspaces": (list(dict.fromkeys([f"{t[0]}|{t[1]}" for t in strategy_data])), "Searchspaces"), "time": ( - [np.linspace(0.0, 1.0, bins)], + np.round(np.linspace(0.0, 1.0, bins), 2), f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", ), } - x_labels = label_data[x_type] - y_labels = label_data[y_type] + x_labels = label_data[x_type][0] + y_labels = label_data[y_type][0] if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): - raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") - # TODO override plot_data + plot_data = np.array([t[3] for t in strategy_data]) + if x_type == "searchspaces": + plot_data = plot_data.transpose() + # raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): plot_data = plot_data.reshape(len(x_labels), len(y_labels)) else: @@ -467,12 +472,27 @@ def __init__( assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" - axs[0].set_xlabel(label_data[x_type]) - axs[0].set_ylabel(label_data[y_type]) + axs[0].set_xlabel(label_data[x_type][1]) + axs[0].set_ylabel(label_data[y_type][1]) axs[0].set_xticks(ticks=np.arange(len(x_labels)), labels=x_labels, rotation=45) axs[0].set_yticks(ticks=np.arange(len(y_labels)), labels=y_labels) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest") - fig.colorbar(hm) + cbar = fig.colorbar(hm) + cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") + + # keep only non-overlapping xticks + if len(x_labels) > 15: + [ + l.set_visible(False) + for (i, l) in enumerate(axs[0].xaxis.get_ticklabels()) + if i % round(len(x_labels) / 15) != 0 + ] + if len(y_labels) > 15: + [ + l.set_visible(False) + for (i, l) in enumerate(axs[0].yaxis.get_ticklabels()) + if i % round(len(y_labels) / 15) != 0 + ] # finalize the figure and save or display it fig.tight_layout() @@ -486,39 +506,39 @@ def __init__( else: plt.show() - # plot the aggregated searchspaces - for plot in plots: - # get settings - scope: str = plot["scope"] - style: str = plot["style"] - if scope != "aggregate": - continue - if style != "line": - raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") - # plot the aggregation - if continue_after_comparison or not (compare_baselines or compare_split_times): - fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 - ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. - if not hasattr(axs, "__len__"): - axs = [axs] - title = f"""Aggregated Data\napplications: - {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # finalize the figure and save or display it - self.plot_strategies_aggregated( - axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] - ) - fig.tight_layout() - if save_figs: - filename_path = Path(self.plot_filename_prefix) / "aggregated" - fig.savefig(filename_path, dpi=300) - print(f"Figure saved to {filename_path}") - else: - plt.show() + # plot the aggregated searchspaces + for plot in plots: + # get settings + scope: str = plot["scope"] + style: str = plot["style"] + if scope != "aggregate": + continue + if style != "line": + raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") + # plot the aggregation + if continue_after_comparison or not (compare_baselines or compare_split_times): + fig, axs = plt.subplots( + ncols=1, figsize=(9, 6), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"""Aggregated Data\napplications: + {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # finalize the figure and save or display it + self.plot_strategies_aggregated( + axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] + ) + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / "aggregated" + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() def plot_baselines_comparison( self, From 2e11f5936e64b87a2c1f67d97d6bc9b84ad595fc Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 18:18:28 -0700 Subject: [PATCH 054/234] Extended tests to include time and searchspace heatmaps --- .../integration/mockfiles/test.json | 16 +++++++++++++++- .../integration/test_visualization.py | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index cd5bbbf..3e29115 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -74,7 +74,21 @@ ], "y_axis_value_types": [ "gpus" - ] + ], + "vmin": -0.75, + "vmax": 1.2 + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ], + "vmin": -0.75, + "bins": 10 }, { "scope": "aggregate", diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 4fb735d..5130b83 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -21,6 +21,7 @@ plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus.png" +plot_path_heatmap_time = plot_path / "random_sample_10_iter_heatmap_time_searchspaces.png" plot_path_aggregated = plot_path / "aggregated.png" plot_path_split_times_fevals = plot_path / f"{experiment_title}_split_times_fevals.png" plot_path_split_times_time = plot_path / f"{experiment_title}_split_times_time.png" @@ -30,6 +31,7 @@ plot_path_fevals, plot_path_time, plot_path_heatmap, + plot_path_heatmap_time, plot_path_aggregated, plot_path_split_times_fevals, plot_path_split_times_time, From ea5d0ee567221a63203e35abc290ae2ba18712ad Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 18:26:22 -0700 Subject: [PATCH 055/234] Parametrized test for more accurate reporting --- .../integration/test_visualization.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 5130b83..cd3ef3e 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -2,6 +2,7 @@ from pathlib import Path +import pytest from test_run_experiment import ( _remove_dir, cached_visualization_file, @@ -61,6 +62,7 @@ def teardown_module(): _remove_dir(experiment_path) +@pytest.mark.dependency() def test_visualize_experiment(): """Visualize a dummy experiment.""" assert normal_cachefile_destination.exists() @@ -75,6 +77,12 @@ def test_visualize_experiment(): continue_after_comparison=True, compare_extra_baselines=True, ) + + +@pytest.mark.dependency(depends=["test_visualize_experiment"]) +@pytest.mark.parametrize("plot_filepath", plot_filepaths) +def test_visualized_plot(plot_filepath: Path): + """Test whether valid plots have been produced.""" for plot_filepath in plot_filepaths: assert ( plot_filepath.exists() From bca7137f6d2b2420c0f91bc4dcfda04581541175 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 18:43:11 -0700 Subject: [PATCH 056/234] Recalculated aspect ratio for non-square heatmaps --- src/autotuning_methodology/visualize_experiments.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 3fa4dfb..1426126 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -428,7 +428,7 @@ def __init__( # get the performance per selected type in an array strategy_data = data_collected[strategy_name] - plot_data = np.array([t[2] for t in strategy_data]) + plot_data = np.stack(np.array([t[2] for t in strategy_data])) cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) cutoff_percentile_start: float = self.experiment["statistics_settings"].get( "cutoff_percentile_start", 0.01 @@ -445,7 +445,7 @@ def __init__( x_labels = label_data[x_type][0] y_labels = label_data[y_type][0] if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): - plot_data = np.array([t[3] for t in strategy_data]) + plot_data = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() # raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") @@ -480,6 +480,10 @@ def __init__( cbar = fig.colorbar(hm) cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") + # adjust from squares to rectangles if necessary + if plot_data.shape[0] != plot_data.shape[1]: + axs[0].set_aspect(plot_data.shape[1] / plot_data.shape[0]) + # keep only non-overlapping xticks if len(x_labels) > 15: [ From b13879e988799b8f71447bcda8b891864a4a8b16 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 19:16:21 -0700 Subject: [PATCH 057/234] Layout of heatmaps and colorbars matches figure size --- .../visualize_experiments.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 1426126..3b12616 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -456,9 +456,15 @@ def __init__( f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." ) + # validate the data + outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + # set up the plot fig, axs = plt.subplots( - ncols=1, figsize=(8, 8), dpi=300 + ncols=1, figsize=(9, 7), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] @@ -468,22 +474,18 @@ def __init__( fig.suptitle(title) # plot the heatmap - outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert ( - len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" axs[0].set_xlabel(label_data[x_type][1]) axs[0].set_ylabel(label_data[y_type][1]) axs[0].set_xticks(ticks=np.arange(len(x_labels)), labels=x_labels, rotation=45) axs[0].set_yticks(ticks=np.arange(len(y_labels)), labels=y_labels) - hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest") + hm = axs[0].imshow( + plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest", aspect="auto" + ) + + # plot the colorbar cbar = fig.colorbar(hm) cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") - # adjust from squares to rectangles if necessary - if plot_data.shape[0] != plot_data.shape[1]: - axs[0].set_aspect(plot_data.shape[1] / plot_data.shape[0]) - # keep only non-overlapping xticks if len(x_labels) > 15: [ From bbd45662033d10fdee161db1bb06e1530a9a1f94 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 21:54:13 -0700 Subject: [PATCH 058/234] Improvements for more optimal use of space for heatmap plots --- .../schemas/experiments.json | 1 + .../visualize_experiments.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index ae3d0ba..442d423 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -358,6 +358,7 @@ "absolute", "normalized", "baseline", + "time", "applications", "gpus", "searchspaces" diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 3b12616..e7769a9 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -422,8 +422,10 @@ def __init__( score_binned = [np.mean(c, axis=0) for c in curve_binned] # set the data + gpu_display_name = str(gpu_name).replace("_", " ") + application_display_name = str(application_name).replace("_", " ").capitalize() data_collected[strategy_name].append( - tuple([gpu_name, application_name, score, score_binned]) + tuple([gpu_display_name, application_display_name, score, score_binned]) ) # get the performance per selected type in an array @@ -436,7 +438,10 @@ def __init__( label_data = { "gpus": (list(dict.fromkeys([t[0] for t in strategy_data])), "GPUs"), "applications": (list(dict.fromkeys([t[1] for t in strategy_data])), "Applications"), - "searchspaces": (list(dict.fromkeys([f"{t[0]}|{t[1]}" for t in strategy_data])), "Searchspaces"), + "searchspaces": ( + list(dict.fromkeys([f"{t[1]} on\n{t[0]}" for t in strategy_data])), + "Searchspaces", + ), "time": ( np.round(np.linspace(0.0, 1.0, bins), 2), f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", @@ -451,6 +456,8 @@ def __init__( # raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): plot_data = plot_data.reshape(len(x_labels), len(y_labels)) + if x_type == "gpus" and y_type == "applications": + plot_data = plot_data.transpose() else: raise NotImplementedError( f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." @@ -464,7 +471,7 @@ def __init__( # set up the plot fig, axs = plt.subplots( - ncols=1, figsize=(9, 7), dpi=300 + ncols=1, figsize=(9, 6), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] @@ -570,7 +577,7 @@ def plot_baselines_comparison( save_fig: whether to save the resulting figure to file. Defaults to False. """ dist = searchspace_stats.objective_performances_total_sorted - plt.figure(figsize=(8, 5), dpi=300) + plt.figure(figsize=(9, 7), dpi=300) # list the baselines to test baselines: list[Baseline] = list() From 89c714f75422c18a6baecf89945b90eaded97af5 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 28 Oct 2024 22:18:22 -0700 Subject: [PATCH 059/234] Improved the placement of ticks to ensure the full range is displayed --- .../visualize_experiments.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index e7769a9..2cf23c1 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -447,15 +447,15 @@ def __init__( f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", ), } - x_labels = label_data[x_type][0] - y_labels = label_data[y_type][0] + x_ticks = label_data[x_type][0] + y_ticks = label_data[y_type][0] if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): plot_data = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() # raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): - plot_data = plot_data.reshape(len(x_labels), len(y_labels)) + plot_data = plot_data.reshape(len(x_ticks), len(y_ticks)) if x_type == "gpus" and y_type == "applications": plot_data = plot_data.transpose() else: @@ -483,8 +483,8 @@ def __init__( # plot the heatmap axs[0].set_xlabel(label_data[x_type][1]) axs[0].set_ylabel(label_data[y_type][1]) - axs[0].set_xticks(ticks=np.arange(len(x_labels)), labels=x_labels, rotation=45) - axs[0].set_yticks(ticks=np.arange(len(y_labels)), labels=y_labels) + axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=45) + axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) hm = axs[0].imshow( plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest", aspect="auto" ) @@ -493,19 +493,20 @@ def __init__( cbar = fig.colorbar(hm) cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") - # keep only non-overlapping xticks - if len(x_labels) > 15: - [ - l.set_visible(False) - for (i, l) in enumerate(axs[0].xaxis.get_ticklabels()) - if i % round(len(x_labels) / 15) != 0 - ] - if len(y_labels) > 15: - [ - l.set_visible(False) - for (i, l) in enumerate(axs[0].yaxis.get_ticklabels()) - if i % round(len(y_labels) / 15) != 0 - ] + # keep only non-overlapping ticks + max_ticks = 15 + if len(x_ticks) > max_ticks: + indices = np.linspace(0, len(x_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(x_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].xaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) + if len(y_ticks) > max_ticks: + indices = np.linspace(0, len(y_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(y_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].yaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) # finalize the figure and save or display it fig.tight_layout() From b04657f0367835a6295fd111a9084650148bc7a6 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 29 Oct 2024 16:04:22 -0700 Subject: [PATCH 060/234] Added script for convertion kernel tuner cache files to T4 format --- .gitignore | 1 + cached_data_used/cachefiles/kerneltuner_to_T4.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 cached_data_used/cachefiles/kerneltuner_to_T4.py diff --git a/.gitignore b/.gitignore index b55dd8a..adbd8a2 100755 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ test_run_experiment/* */setup/*_input.json # exceptions +!cached_data_used/cachefiles/kerneltuner_to_T4.py !cached_data_used/cachefiles/ktt_values_to_kerneltuner.py # ignore specific experiment files diff --git a/cached_data_used/cachefiles/kerneltuner_to_T4.py b/cached_data_used/cachefiles/kerneltuner_to_T4.py new file mode 100644 index 0000000..9865c6c --- /dev/null +++ b/cached_data_used/cachefiles/kerneltuner_to_T4.py @@ -0,0 +1,16 @@ +from pathlib import Path + +from kernel_tuner.cache.cli_tools import convert_t4 + +basepath = Path(__file__).parent +directories = ["convolution_milo", "dedisp_milo", "gemm_milo", "hotspot_milo"] + +for directory in directories: + print(f"Converting files in {directory}") + dirpath = Path(basepath / directory) + assert dirpath.is_dir(), f"Not a directory: {dirpath}" + for infile in dirpath.iterdir(): + if infile.suffix.endswith("json") and not infile.stem.endswith("_T4"): + print(f" | converting {infile.stem}") + outfile = infile.with_stem(infile.stem + "_T4") + convert_t4(infile, outfile) From 2cde8b379041d4a9329f37cf87af4a394bd96532 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 29 Oct 2024 19:07:05 -0700 Subject: [PATCH 061/234] Created a script for extending a Kernel Tuner cachefile parameters by using a similar complete cachefile to change relatively to --- .gitignore | 3 +- .../extend_kerneltuner_cachefile.py | 108 ++++++++++++++++++ 2 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 cached_data_used/cachefiles/extend_kerneltuner_cachefile.py diff --git a/.gitignore b/.gitignore index adbd8a2..e3eb8c1 100755 --- a/.gitignore +++ b/.gitignore @@ -18,8 +18,7 @@ test_run_experiment/* */setup/*_input.json # exceptions -!cached_data_used/cachefiles/kerneltuner_to_T4.py -!cached_data_used/cachefiles/ktt_values_to_kerneltuner.py +!cached_data_used/cachefiles/*.py # ignore specific experiment files experiment_files/milo.json diff --git a/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py b/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py new file mode 100644 index 0000000..ef744b2 --- /dev/null +++ b/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py @@ -0,0 +1,108 @@ +"""Utility script to extend a Kernel Tuner cachefile by using another cachefile that has those parameters. Use with caution.""" + +import json +import numbers +from copy import deepcopy +from itertools import product +from pathlib import Path + +# set the files to use +basepath = Path(__file__).parent +target_infile = basepath / "convolution_milo" / "MI50.json" +target_outfile = basepath / "convolution_milo" / "MI50_extended.json" +extra_sourcefile = basepath / "convolution_milo" / "MI250X.json" + +# load the JSON files +with target_infile.open() as fp: + target: dict = json.load(fp) + new_target = deepcopy(target) +with extra_sourcefile.open() as fp: + extra_source: dict = json.load(fp)["cache"] + +# define the parameters to add, their default value, and their list of values +# caution: order must be the same as in `extra_sourcefile`, `extra_sourcefile` use the superset of parameter values +parameters_to_add = { + "use_shmem": (1, [0, 1]), + "use_cmem": (1, [1]), + "filter_height": (15, [15]), + "filter_width": (15, [15]), +} +default_config_string = ",".join([str(p[0]) for p in parameters_to_add.values()]) + +# add parameters to header +for param, (_, values) in parameters_to_add.items(): + new_target["tune_params_keys"].append(param) + new_target["tune_params"][param] = values + +# add parameters to cache +# caution: does not take restrictions into account +extra_configurations = list(product(*[p[1] for p in parameters_to_add.values()])) +for config_string, base_config in target["cache"].items(): + # lookup the base config in the other cachefile using the defaults + source_base_config: dict = extra_source[f"{config_string},{default_config_string}"] + # for each existing config, add as many new configurations as needed by inferring from source + for extra_config in extra_configurations: + extra_config_string = ",".join([str(p) for p in extra_config]) + new_config_string = f"{config_string},{extra_config_string}" + + # lookup the extra config in the other cachefile to use as a basis + try: + source_extra_config: dict = extra_source[new_config_string] + except KeyError: + # as we assume that the extra source is a superset, this config is most likely skipped due to restrictions + continue + new_target_config = deepcopy(source_extra_config) + + # change the values for target based on the relative difference between target, source base and source extra + def change_relatively(target_base, source_base, source_extra): + # check if there are any error values + if isinstance(target_base, str): + return target_base + elif isinstance(source_extra, str): + return source_extra + elif isinstance(source_base, str): + return source_base + # make sure all are the same type + assert type(target_base) == type(source_base) == type(source_extra) + if isinstance(target_base, (list, tuple)): + # if we're dealing with lists, go recursive + assert len(target_base) == len(source_base) == len(source_extra) + return [ + change_relatively(target_base[i], source_base[i], source_extra[i]) for i in range(len(target_base)) + ] + # final check for the type + if not isinstance(target_base, numbers.Real): + raise ValueError( + f"Relative value change is not possible for non-numeric values of type {type(target_base)} ({target_base})" + ) + # since we're dealing with numbers, we can do the relative value change + try: + fraction = source_extra / source_base + return target_base * fraction + except ZeroDivisionError: + return target_base + + # apply the relative value change + for key in [ + "time", + "times", + "compile_time", + "verification_time", + "benchmark_time", + "strategy_time", + "framework_time", + "GFLOP/s", + ]: + new_target_config[key] = change_relatively( + base_config[key], source_base_config[key], source_extra_config[key] + ) + + # add the new config to the new target data + new_target["cache"][new_config_string] = new_target_config + + # delete the old config from the new data + del new_target["cache"][config_string] + +# write to the target file +with target_outfile.open("w+") as fp: + json.dump(new_target, fp) From 9fb655e1789c2808d8bb70b0f7da818cc8a8ffe8 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 29 Oct 2024 20:37:21 -0700 Subject: [PATCH 062/234] Minor improvements to cachefile extension script --- .../cachefiles/extend_kerneltuner_cachefile.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py b/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py index ef744b2..2c4accd 100644 --- a/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py +++ b/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py @@ -8,7 +8,7 @@ # set the files to use basepath = Path(__file__).parent -target_infile = basepath / "convolution_milo" / "MI50.json" +target_infile = basepath / "convolution_milo" / "MI50_original.json" target_outfile = basepath / "convolution_milo" / "MI50_extended.json" extra_sourcefile = basepath / "convolution_milo" / "MI250X.json" @@ -40,6 +40,10 @@ for config_string, base_config in target["cache"].items(): # lookup the base config in the other cachefile using the defaults source_base_config: dict = extra_source[f"{config_string},{default_config_string}"] + + # delete the old config from the new data + del new_target["cache"][config_string] + # for each existing config, add as many new configurations as needed by inferring from source for extra_config in extra_configurations: extra_config_string = ",".join([str(p) for p in extra_config]) @@ -100,8 +104,10 @@ def change_relatively(target_base, source_base, source_extra): # add the new config to the new target data new_target["cache"][new_config_string] = new_target_config - # delete the old config from the new data - del new_target["cache"][config_string] +# check that the extension is succesful +assert len(new_target["cache"]) == len( + extra_source +), f"Lengths don't match; target: {len(new_target['cache'])}, source: {len(extra_source)}" # write to the target file with target_outfile.open("w+") as fp: From 1cb57949fd30e73419ae4e6d4efb106a29c6a15f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 29 Oct 2024 23:39:04 -0700 Subject: [PATCH 063/234] Adjusted minimum number of valid evaluations --- src/autotuning_methodology/experiments_defaults.json | 2 +- src/autotuning_methodology/schemas/experiments.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index c0c1b41..bd99e75 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -25,7 +25,7 @@ "stochastic": true, "repeats": 25, "samples": 32, - "minimum_number_of_valid_search_iterations": 20, + "minimum_number_of_valid_search_iterations": 10, "ignore_cache": false }, "search_strategies": [ diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 442d423..e8b24fd 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -120,7 +120,7 @@ "description": "How many non-error, valid configurations account for a single run of search algorithm", "type": "integer", "minimum": 1, - "default": 20 + "default": 10 }, "ignore_cache": { "description": "If true, always re-run the experiments, even though results from previously executed experiments are stored in run folder.", From 96380a03e36cf0fbb53085f52893436f4cd3de98 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 30 Oct 2024 16:04:41 -0700 Subject: [PATCH 064/234] Heatmap colors are now set to fixed points, making visual comparison possible --- .../visualize_experiments.py | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 2cf23c1..06e6c46 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import rgb2hex +from matplotlib.colors import LinearSegmentedColormap, rgb2hex from autotuning_methodology.baseline import ( Baseline, @@ -360,9 +360,17 @@ def __init__( assert len(plot_y_value_types) == 1 x_type = plot_x_value_types[0] y_type = plot_y_value_types[0] - vmin = plot.get("vmin", -0.5) - vmax = plot.get("vmax", 1.0) bins = plot.get("bins", 10) + vmin = plot.get("vmin", -5.0) + vmax = plot.get("vmax", 1.0) + if vmin != -5.0: + warnings.warn( + f"Careful: VMin has been changed from -5.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin." + ) + if vmax != 1.0: + warnings.warn( + f"Careful: VMax has been changed from 1.0 to {vmax}. This breaks visual comparison compatiblity with plots that do not have the same VMax." + ) # collect and plot the data for each search strategy data_collected: dict[str, list[tuple]] = defaultdict(list) @@ -480,14 +488,28 @@ def __init__( if not save_figs: fig.suptitle(title) + # set the colormap + def norm_color_val(v): + """Normalize a color value to fit in the 0-1 range.""" + return (v - vmin) / (vmax - vmin) + + cmap = LinearSegmentedColormap.from_list( + "my_colormap", + [ + (norm_color_val(-5.0), "black"), + (norm_color_val(-3.0), "red"), + (norm_color_val(-1.0), "orange"), + (norm_color_val(0.0), "yellow"), + (norm_color_val(1.0), "green"), + ], + ) + # plot the heatmap axs[0].set_xlabel(label_data[x_type][1]) axs[0].set_ylabel(label_data[y_type][1]) axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=45) axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) - hm = axs[0].imshow( - plot_data, vmin=vmin, vmax=vmax, cmap="RdYlGn", interpolation="nearest", aspect="auto" - ) + hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") # plot the colorbar cbar = fig.colorbar(hm) From c7f96c3da9108caacf6e8e67d937e1fd025bd71a Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 30 Oct 2024 16:49:42 -0700 Subject: [PATCH 065/234] Inverted heatmap axis are now handled correctly --- src/autotuning_methodology/visualize_experiments.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 06e6c46..0ef3589 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -458,14 +458,13 @@ def __init__( x_ticks = label_data[x_type][0] y_ticks = label_data[y_type][0] if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): - plot_data = np.stack(np.array([t[3] for t in strategy_data])) + plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() - # raise NotImplementedError(f"Heatmap has not yet been implemented for {x_type}") elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): - plot_data = plot_data.reshape(len(x_ticks), len(y_ticks)) - if x_type == "gpus" and y_type == "applications": - plot_data = plot_data.transpose() + plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) + if x_type == "gpus": + plot_data = np.transpose(plot_data) else: raise NotImplementedError( f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." From 6d05e935aa860eeb6df1bc30ca9b65bae7dd5d58 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 30 Oct 2024 16:50:21 -0700 Subject: [PATCH 066/234] Added and updated experiment files for hyperparameter tuning comparison --- experiment_files/compare_hypertuners.json | 2 +- .../compare_hypertuners_milo.json | 186 ++++++++++++++++++ 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 experiment_files/compare_hypertuners_milo.json diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json index d68c0b7..e1108af 100644 --- a/experiment_files/compare_hypertuners.json +++ b/experiment_files/compare_hypertuners.json @@ -1,5 +1,5 @@ { - "version": "1.0.0", + "version": "1.1.0", "name": "Compare hyperparameter tuning", "parent_folder": "./hyperparametertuning", "experimental_groups_defaults": { diff --git a/experiment_files/compare_hypertuners_milo.json b/experiment_files/compare_hypertuners_milo.json new file mode 100644 index 0000000..ea7f8cd --- /dev/null +++ b/experiment_files/compare_hypertuners_milo.json @@ -0,0 +1,186 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning_milo", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "dedispersion_milo.json" + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "convolution_milo.json" + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "W6600" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "pso_default", + "search_method": "pso", + "display_name": "PSO default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.5 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.0 + } + ] + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "w", + "value": 0.25 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner" + }, + { + "name": "pso_tuned_inverse", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.75 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO tuned inv.", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "searchspaces" + ], + "y_axis_value_types": [ + "time" + ], + "bins": 100 + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 743482e1ba8202b296f8a6a7776406e878d84c92 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 1 Nov 2024 18:34:06 -0700 Subject: [PATCH 067/234] Added a try-catch system to re-execute experiment caches that did not have enough overlap --- src/autotuning_methodology/caching.py | 8 +++ src/autotuning_methodology/curves.py | 7 ++- src/autotuning_methodology/experiments.py | 2 +- .../report_experiments.py | 60 ++++++++++++------- 4 files changed, 52 insertions(+), 25 deletions(-) diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index 622af19..aeabd46 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -187,3 +187,11 @@ def get_results(self) -> Results: def has_results(self) -> bool: """Checks whether there are results or the file exists.""" return self.__stored or self.__check_for_file() + + def delete(self) -> bool: + """Deletes the file if it exists, returns true if succesfully deleted.""" + fp = self.__get_cache_full_filepath() + if fp.exists() and fp.is_file(): + fp.unlink() + return True + return False diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index df5a711..06035d2 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -718,9 +718,10 @@ def _get_curve_over_time_values_in_range( time_range_start = time_range[0] * (1 - time_range_margin) time_range_end = time_range[-1] * (1 + time_range_margin) range_mask_margin = (time_range_start <= times) & (times <= time_range_end) - assert np.all( - np.count_nonzero(range_mask_margin, axis=0) > 1 - ), f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}" + + # make sure there is enough overlap in the time ranges + if not np.all(np.count_nonzero(range_mask_margin, axis=0) > 1): + raise ValueError(f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}", self.name, self.application_name, self.device_name) times = np.where(range_mask_margin, times, np.nan) values = np.where(range_mask_margin, values, np.nan) num_repeats = values.shape[1] diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 4baf2eb..002f4df 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -406,7 +406,7 @@ def generate_experiment_file( return experiment_file_path.resolve() -def execute_experiment(filepath: str, profiling: bool = False) -> tuple[dict, list, dict, dict]: +def execute_experiment(filepath: str, profiling: bool = False): """Executes the experiment by retrieving it from the cache or running it. Args: diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 101b17b..2e14616 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -199,9 +199,9 @@ def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None) experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( experiment_filepath, profiling=False ) - experiment_folderpath = experiment["parent_folder_absolute_path"] # get the settings + experiment_folderpath = experiment["parent_folder_absolute_path"] minimization: bool = experiment["statistics_settings"]["minimization"] cutoff_percentile: float = experiment["statistics_settings"]["cutoff_percentile"] cutoff_percentile_start: float = experiment["statistics_settings"]["cutoff_percentile_start"] @@ -209,27 +209,45 @@ def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None) confidence_level: float = experiment["visualization_settings"]["confidence_level"] # aggregate the data - aggregation_data = get_aggregation_data( - experiment_folderpath, - experiment, - searchspace_statistics, - strategies, - results_descriptions, - cutoff_percentile, - cutoff_percentile_start, - confidence_level, - minimization, - time_resolution, - use_strategy_as_baseline, - ) + def get_agg_data(): + return get_aggregation_data( + experiment_folderpath, + experiment, + searchspace_statistics, + strategies, + results_descriptions, + cutoff_percentile, + cutoff_percentile_start, + confidence_level, + minimization, + time_resolution, + use_strategy_as_baseline, + ) - # get the aggregated performance per strategy - ( - strategies_performance, - strategies_lower_err, - strategies_upper_err, - strategies_real_stopping_point_fraction, - ) = get_strategies_aggregated_performance(list(aggregation_data.values()), confidence_level) + try: + # get the aggregated performance per strategy + aggregation_data = get_agg_data() + strategies_performance, _, _, _ = get_strategies_aggregated_performance( + list(aggregation_data.values()), confidence_level + ) + except AssertionError as e: + if "Not enough overlap in time range and time values" in str(e.args[0]): + # delete the broken cachefile + _, strategy_name, application_name, device_name = e.args + assert results_descriptions[device_name][application_name][ + strategy_name + ].delete(), "Failed to delete cachefile" + + # re-execute the experiment and recollect the data to see if the issue is resolved + experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( + experiment_filepath, profiling=False + ) + aggregation_data = get_agg_data() + strategies_performance, _, _, _ = get_strategies_aggregated_performance( + list(aggregation_data.values()), confidence_level + ) + else: + raise e # calculate the average performance score and error per strategy results: dict[str, dict[str, float]] = dict() From 81bc0924977ea587a94e2ea3c368c612992e65e9 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Sun, 3 Nov 2024 15:03:34 -0800 Subject: [PATCH 068/234] Changed error type --- src/autotuning_methodology/report_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 2e14616..248624e 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -230,7 +230,7 @@ def get_agg_data(): strategies_performance, _, _, _ = get_strategies_aggregated_performance( list(aggregation_data.values()), confidence_level ) - except AssertionError as e: + except ValueError as e: if "Not enough overlap in time range and time values" in str(e.args[0]): # delete the broken cachefile _, strategy_name, application_name, device_name = e.args From 0bcfaf63ac66ad4d54fc2c66df91bd97c875b972 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 6 Nov 2024 10:10:52 -0800 Subject: [PATCH 069/234] Updated comparison of hypertuned algorithms --- .../compare_hypertuners_milo.json | 111 +++++++++++++++++- 1 file changed, 107 insertions(+), 4 deletions(-) diff --git a/experiment_files/compare_hypertuners_milo.json b/experiment_files/compare_hypertuners_milo.json index ea7f8cd..c5e5b6a 100644 --- a/experiment_files/compare_hypertuners_milo.json +++ b/experiment_files/compare_hypertuners_milo.json @@ -85,7 +85,8 @@ } ], "display_name": "PSO tuned", - "autotuner": "KernelTuner" + "autotuner": "KernelTuner", + "color_parent": "pso_default" }, { "name": "pso_tuned_inverse", @@ -106,14 +107,116 @@ { "name": "c1", "value": 3.0 + } + ], + "display_name": "PSO tuned inv.", + "autotuner": "KernelTuner", + "color_parent": "pso_default" + }, + { + "name": "greedy_ils_default", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" }, { - "name": "c2", - "value": 1.5 + "name": "restart", + "value": true + }, + { + "name": "no_improvement", + "value": 50 + }, + { + "name": "random_walk", + "value": 0.3 } ], - "display_name": "PSO tuned inv.", + "display_name": "Greedy ILS default", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils_tuned", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" + }, + { + "name": "restart", + "value": true + }, + { + "name": "no_improvement", + "value": 66 + }, + { + "name": "random_walk", + "value": 0.9 + } + ], + "display_name": "Greedy ILS tuned", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "greedy_ils_tuned_inverse", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "adjacent" + }, + { + "name": "restart", + "value": false + }, + { + "name": "no_improvement", + "value": 66 + }, + { + "name": "random_walk", + "value": 0.9 + } + ], + "display_name": "Greedy ILS tuned inv.", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "bayes_opt", + "search_method": "bayes_opt", + "display_name": "Bayesian Optimization", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "CG" + } + ], + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing" } ], "statistics_settings": { From 6edcbb53ced924064616d2f6de4436adb1773797 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 6 Nov 2024 10:11:19 -0800 Subject: [PATCH 070/234] Extended VMin and colorbar range --- src/autotuning_methodology/visualize_experiments.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 0ef3589..7637799 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -361,11 +361,11 @@ def __init__( x_type = plot_x_value_types[0] y_type = plot_y_value_types[0] bins = plot.get("bins", 10) - vmin = plot.get("vmin", -5.0) + vmin = plot.get("vmin", -10.0) vmax = plot.get("vmax", 1.0) - if vmin != -5.0: + if vmin != -10.0: warnings.warn( - f"Careful: VMin has been changed from -5.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin." + f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin." ) if vmax != 1.0: warnings.warn( @@ -495,8 +495,8 @@ def norm_color_val(v): cmap = LinearSegmentedColormap.from_list( "my_colormap", [ - (norm_color_val(-5.0), "black"), - (norm_color_val(-3.0), "red"), + (norm_color_val(-10.0), "black"), + (norm_color_val(-4.0), "red"), (norm_color_val(-1.0), "orange"), (norm_color_val(0.0), "yellow"), (norm_color_val(1.0), "green"), From f0fe17c2736077f4a52de6e69bd24d4c6ef3b5bf Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 7 Nov 2024 10:35:20 -0800 Subject: [PATCH 071/234] Added meta comparison of optimization algorithms for hyperparameter tuning --- .gitignore | 1 + experiment_files/compare_meta_algorithms.json | 99 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 experiment_files/compare_meta_algorithms.json diff --git a/.gitignore b/.gitignore index e3eb8c1..92749ae 100755 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ cached_data_used/last_run/* cached_data_used/import_runs/* methodology_paper_evaluation/run/* hyperparametertuning/* +hyperparametertuning_*/* test_run_experiment/* # ignore setup files diff --git a/experiment_files/compare_meta_algorithms.json b/experiment_files/compare_meta_algorithms.json new file mode 100644 index 0000000..577f662 --- /dev/null +++ b/experiment_files/compare_meta_algorithms.json @@ -0,0 +1,99 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter tuning meta algorithms", + "parent_folder": "./hyperparametertuning_meta", + "experimental_groups_defaults": { + "applications": [ + { + "name": "hyperparamtuning_pso", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "hyperparamtuning_pso.json" + } + ], + "gpus": [ + "arm" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 2, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "pso", + "search_method": "pso", + "display_name": "PSO", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils", + "search_method": "greedy_ils", + "display_name": "Greedy ILS", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": false, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "compilation", + "framework", + "search_algorithm", + "validation" + ], + "objective_performance_keys": [ + "score" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "searchspaces" + ], + "y_axis_value_types": [ + "time" + ], + "bins": 100 + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 9d28395784a9bc82e202d7310f44f4f1e2eb856b Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 7 Nov 2024 10:36:19 -0800 Subject: [PATCH 072/234] Implemented support for different objectives and optimization direction --- src/autotuning_methodology/curves.py | 7 +++++- src/autotuning_methodology/runner.py | 26 +++++++++++++++++----- src/autotuning_methodology/schemas/T4.json | 3 ++- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index 06035d2..f3b318a 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -721,7 +721,12 @@ def _get_curve_over_time_values_in_range( # make sure there is enough overlap in the time ranges if not np.all(np.count_nonzero(range_mask_margin, axis=0) > 1): - raise ValueError(f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}", self.name, self.application_name, self.device_name) + raise ValueError( + f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}", + self.name, + self.application_name, + self.device_name, + ) times = np.where(range_mask_margin, times, np.nan) values = np.where(range_mask_margin, values, np.nan) num_repeats = values.shape[1] diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 8e70614..86512a1 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -108,7 +108,8 @@ def tune( application_name: str, device_name: str, group: dict, - tune_options: dict, # TODO check if still necessary when we have input json file + objective: str, + objective_higher_is_better: bool, profiling: bool, searchspace_stats: SearchspaceStatistics, ) -> tuple[list, list, int]: @@ -121,7 +122,8 @@ def tune( application_name: the name of the program to tune. device_name: the device (GPU) to tune on. group: the experimental group (usually the search method). - tune_options: a special options dictionary passed along to the autotuning framework. + objective: the key to optimize for. + objective_higher_is_better: whether to maximize or minimize the objective. profiling: whether profiling statistics should be collected. searchspace_stats: a ``SearchspaceStatistics`` object passed to convert imported runs. @@ -141,7 +143,13 @@ def tune_with_kerneltuner(): with warnings.catch_warnings(): warnings.simplefilter("ignore") metadata, results = tune_kernel_T1( - input_file, simulation_mode=True, output_T4=True, iterations=samples, strategy_options=group["budget"] + input_file, + objective=objective, + objective_higher_is_better=objective_higher_is_better, + simulation_mode=True, + output_T4=True, + iterations=samples, + strategy_options=group["budget"], ) if "max_fevals" in group["budget"]: max_fevals = group["budget"]["max_fevals"] @@ -215,8 +223,13 @@ def collect_results( The ``ResultsDescription`` object with the results. """ min_num_evals: int = group["minimum_number_of_valid_search_iterations"] - # TODO put the tune options in the .json in strategy_defaults? Make it Kernel Tuner independent - tune_options = {"verbose": False, "quiet": True, "simulation_mode": True} + + if len(results_description.objective_performance_keys) != 1: + raise NotImplementedError( + f"Multi objective tuning is not yet supported ({results_description.objective_performance_keys})" + ) + objective = results_description.objective_performance_keys[0] + objective_higher_is_better = not results_description.minimization def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): """If multiple attempts are necessary, report the reason.""" @@ -258,7 +271,8 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt results_description.application_name, results_description.device_name, group, - tune_options, + objective, + objective_higher_is_better, profiling, searchspace_stats, ) diff --git a/src/autotuning_methodology/schemas/T4.json b/src/autotuning_methodology/schemas/T4.json index f76b102..82db64e 100644 --- a/src/autotuning_methodology/schemas/T4.json +++ b/src/autotuning_methodology/schemas/T4.json @@ -68,7 +68,8 @@ "value": { "type": [ "number", - "string" + "string", + "array" ] }, "unit": { From ff75788c925ac444d8fad699d8389e337fe6b182 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 19 Nov 2024 18:19:21 -0800 Subject: [PATCH 073/234] Comparison of various BO methods --- .../compare_hypertuners_milo.json | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/experiment_files/compare_hypertuners_milo.json b/experiment_files/compare_hypertuners_milo.json index c5e5b6a..b320f02 100644 --- a/experiment_files/compare_hypertuners_milo.json +++ b/experiment_files/compare_hypertuners_milo.json @@ -18,8 +18,7 @@ "gpus": [ "A100", "A4000", - "MI250X", - "W6600" + "MI250X" ], "pattern_for_full_search_space_filenames": { "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" @@ -190,9 +189,30 @@ { "name": "bayes_opt", "search_method": "bayes_opt", - "display_name": "Bayesian Optimization", + "display_name": "Bayesian Optimization SciKit", "autotuner": "KernelTuner" }, + { + "name": "bayes_opt_botorch", + "search_method": "bayes_opt_BOTorch", + "display_name": "Bayesian Optimization BOTorch", + "autotuner": "KernelTuner", + "color_parent": "bayes_opt" + }, + { + "name": "bayes_opt_botorch_2", + "search_method": "bayes_opt_BOTorch", + "display_name": "Bayesian Optimization BOTorch 2", + "autotuner": "KernelTuner", + "color_parent": "bayes_opt" + }, + { + "name": "bayes_opt_botorch_3", + "search_method": "bayes_opt_BOTorch", + "display_name": "Bayesian Optimization BOTorch 3", + "autotuner": "KernelTuner", + "color_parent": "bayes_opt" + }, { "name": "genetic_algorithm", "search_method": "genetic_algorithm", From ef72b3bea8b4c180ea09c119a9efb08df12f9fa6 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 5 Mar 2025 21:59:15 +0100 Subject: [PATCH 074/234] Added comparison of hypertuners for paper --- .../compare_hypertuners_milo.json | 125 +------- .../compare_hypertuners_paper.json | 287 ++++++++++++++++++ 2 files changed, 294 insertions(+), 118 deletions(-) create mode 100644 experiment_files/compare_hypertuners_paper.json diff --git a/experiment_files/compare_hypertuners_milo.json b/experiment_files/compare_hypertuners_milo.json index b320f02..2025ee3 100644 --- a/experiment_files/compare_hypertuners_milo.json +++ b/experiment_files/compare_hypertuners_milo.json @@ -58,60 +58,6 @@ } ] }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "w", - "value": 0.25 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.5 - } - ], - "display_name": "PSO tuned", - "autotuner": "KernelTuner", - "color_parent": "pso_default" - }, - { - "name": "pso_tuned_inverse", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "w", - "value": 0.75 - }, - { - "name": "c1", - "value": 3.0 - } - ], - "display_name": "PSO tuned inv.", - "autotuner": "KernelTuner", - "color_parent": "pso_default" - }, { "name": "greedy_ils_default", "search_method": "greedy_ils", @@ -136,56 +82,6 @@ "display_name": "Greedy ILS default", "autotuner": "KernelTuner" }, - { - "name": "greedy_ils_tuned", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "no_improvement", - "value": 66 - }, - { - "name": "random_walk", - "value": 0.9 - } - ], - "display_name": "Greedy ILS tuned", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, - { - "name": "greedy_ils_tuned_inverse", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "adjacent" - }, - { - "name": "restart", - "value": false - }, - { - "name": "no_improvement", - "value": 66 - }, - { - "name": "random_walk", - "value": 0.9 - } - ], - "display_name": "Greedy ILS tuned inv.", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, { "name": "bayes_opt", "search_method": "bayes_opt", @@ -193,23 +89,16 @@ "autotuner": "KernelTuner" }, { - "name": "bayes_opt_botorch", + "name": "bayes_opt_botorch_3", "search_method": "bayes_opt_BOTorch", "display_name": "Bayesian Optimization BOTorch", "autotuner": "KernelTuner", "color_parent": "bayes_opt" }, { - "name": "bayes_opt_botorch_2", - "search_method": "bayes_opt_BOTorch", - "display_name": "Bayesian Optimization BOTorch 2", - "autotuner": "KernelTuner", - "color_parent": "bayes_opt" - }, - { - "name": "bayes_opt_botorch_3", - "search_method": "bayes_opt_BOTorch", - "display_name": "Bayesian Optimization BOTorch 3", + "name": "bayes_opt_botorch_transfer_weighted", + "search_method": "bayes_opt_BOTorch_transfer_weighted", + "display_name": "Bayesian Optimization BOTorch TL", "autotuner": "KernelTuner", "color_parent": "bayes_opt" }, @@ -222,7 +111,7 @@ { "name": "dual_annealing", "search_method": "dual_annealing", - "display_name": "Dual Annealing", + "display_name": "Dual Annealing default", "autotuner": "KernelTuner" }, { @@ -289,10 +178,10 @@ "scope": "search_strategy", "style": "heatmap", "x_axis_value_types": [ - "searchspaces" + "time" ], "y_axis_value_types": [ - "time" + "searchspaces" ], "bins": 100 }, diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json new file mode 100644 index 0000000..60b0ad8 --- /dev/null +++ b/experiment_files/compare_hypertuners_paper.json @@ -0,0 +1,287 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning_milo", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "dedispersion_milo.json" + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "convolution_milo.json" + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "hotspot_milo.json" + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "pso_default", + "search_method": "pso", + "display_name": "PSO default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.5 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.0 + } + ] + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "w", + "value": 0.25 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner", + "color_parent": "pso_default" + }, + { + "name": "pso_tuned_inverse", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.75 + }, + { + "name": "c1", + "value": 3.0 + } + ], + "display_name": "PSO tuned inv.", + "autotuner": "KernelTuner", + "color_parent": "pso_default" + }, + { + "name": "greedy_ils_default", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" + }, + { + "name": "restart", + "value": true + }, + { + "name": "no_improvement", + "value": 50 + }, + { + "name": "random_walk", + "value": 0.3 + } + ], + "display_name": "Greedy ILS default", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils_tuned", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" + }, + { + "name": "restart", + "value": true + }, + { + "name": "no_improvement", + "value": 66 + }, + { + "name": "random_walk", + "value": 0.9 + } + ], + "display_name": "Greedy ILS tuned", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "greedy_ils_tuned_inverse", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "adjacent" + }, + { + "name": "restart", + "value": false + }, + { + "name": "no_improvement", + "value": 66 + }, + { + "name": "random_walk", + "value": 0.9 + } + ], + "display_name": "Greedy ILS tuned inv.", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "CG" + } + ], + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.96, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ], + "bins": 100 + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 57661474c9878fbd39321fab324471384003975e Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 5 Mar 2025 22:00:05 +0100 Subject: [PATCH 075/234] Added automatic conversion of runtimes to measurement if measurement is missing --- .../searchspace_statistics.py | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index aff4a03..6fee150 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -59,22 +59,43 @@ def is_not_invalid_value(value, performance: bool) -> bool: return not invalid_check_function(value) -def to_valid_array(results: list[dict], key: str, performance: bool, from_time_unit: str = None) -> np.ndarray: - """Convert results performance or time values to a numpy array, sum if the input is a list of arrays.""" +def to_valid_array( + results: list[dict], + key: str, + performance: bool, + from_time_unit: str = None, + replace_missing_measurement_from_times_key: str = None, +) -> np.ndarray: + """Convert results performance or time values to a numpy array, sum if the input is a list of arrays. + + replace_missing_measurement_from_times_key: if key is missing from measurements, use the mean value from times. + """ # make a list of all valid values if performance: values = list() for r in results: - for m in r["measurements"]: + val = None + # get the performance value from the measurements + measurements = list(filter(lambda m: m["name"] == key, r["measurements"])) + if len(measurements) == 0: + if replace_missing_measurement_from_times_key is not None: + val = np.mean(r["times"][replace_missing_measurement_from_times_key]) + else: + raise ValueError(f"Measurement with name {key} not found in {r["measurements"]}") + if len(measurements) == 1: + m = measurements[0] if key == m["name"]: val = m["value"] - if is_not_invalid_value(val, performance): - # performance should not be auto-converted - # if len(m["unit"]) > 0: - # val = convert_from_time_unit(val, m["unit"]) - values.append(val) - else: - values.append(np.nan) + elif len(measurements) > 1: + raise ValueError(f"Multiple measurements with the same name {key} found in results") + # register the value + if is_not_invalid_value(val, performance): + # performance should not be auto-converted + # if len(m["unit"]) > 0: + # val = convert_from_time_unit(val, m["unit"]) + values.append(val) + else: + values.append(np.nan) else: values = list( ( @@ -359,7 +380,12 @@ def _load(self) -> bool: # get the performance values per configuration self.objective_performances = dict() for key in self.objective_performance_keys: - self.objective_performances[key] = to_valid_array(results, key, performance=True) + self.objective_performances[key] = to_valid_array( + results, + key, + performance=True, + replace_missing_measurement_from_times_key="runtimes" if key == "time" else None, + ) assert ( self.objective_performances[key].ndim == 1 ), f"Should have one dimension, has {self.objective_performances[key].ndim}" From 50a464b53c68b996867ada5c2ef3ee1ac3bfdcee Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 6 Mar 2025 17:46:23 +0100 Subject: [PATCH 076/234] Added GEMM to hypertuning comparison --- experiment_files/compare_hypertuners_paper.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 60b0ad8..75e7895 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -18,6 +18,11 @@ "name": "hotspot_milo", "folder": "../autotuning_methodology/cached_data_used/kernels", "input_file": "hotspot_milo.json" + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/cached_data_used/kernels", + "input_file": "gemm_milo.json" } ], "gpus": [ From ab937b70ae1155fbb7a6736f70481f10e56af6bf Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 6 Mar 2025 23:56:35 +0100 Subject: [PATCH 077/234] Added benchmark_hub repository as submodule --- .gitmodules | 3 +++ benchmark_hub | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 benchmark_hub diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..19bab30 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "benchmark_hub"] + path = benchmark_hub + url = https://github.com/AutoTuningAssociation/benchmark_hub.git diff --git a/benchmark_hub b/benchmark_hub new file mode 160000 index 0000000..08cdf33 --- /dev/null +++ b/benchmark_hub @@ -0,0 +1 @@ +Subproject commit 08cdf33d59c704227eb44c7279b6ea1ea70c50f4 From fd709e207d1429b9e4f712429ef2b333297cb55b Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 7 Mar 2025 08:43:09 +0100 Subject: [PATCH 078/234] Implemented repository of caches, kernels and experiment files --- benchmark_hub | 2 +- .../extend_kerneltuner_cachefile.py | 114 ----------------- .../cachefiles/kerneltuner_to_T4.py | 16 --- .../cachefiles/ktt_values_to_kerneltuner.py | 116 ------------------ .../compare_hypertuners_paper.json | 10 +- 5 files changed, 6 insertions(+), 252 deletions(-) delete mode 100644 cached_data_used/cachefiles/extend_kerneltuner_cachefile.py delete mode 100644 cached_data_used/cachefiles/kerneltuner_to_T4.py delete mode 100644 cached_data_used/cachefiles/ktt_values_to_kerneltuner.py diff --git a/benchmark_hub b/benchmark_hub index 08cdf33..fc5dec0 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 08cdf33d59c704227eb44c7279b6ea1ea70c50f4 +Subproject commit fc5dec06ecf81dd1222606321f88aca9f7d47918 diff --git a/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py b/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py deleted file mode 100644 index 2c4accd..0000000 --- a/cached_data_used/cachefiles/extend_kerneltuner_cachefile.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Utility script to extend a Kernel Tuner cachefile by using another cachefile that has those parameters. Use with caution.""" - -import json -import numbers -from copy import deepcopy -from itertools import product -from pathlib import Path - -# set the files to use -basepath = Path(__file__).parent -target_infile = basepath / "convolution_milo" / "MI50_original.json" -target_outfile = basepath / "convolution_milo" / "MI50_extended.json" -extra_sourcefile = basepath / "convolution_milo" / "MI250X.json" - -# load the JSON files -with target_infile.open() as fp: - target: dict = json.load(fp) - new_target = deepcopy(target) -with extra_sourcefile.open() as fp: - extra_source: dict = json.load(fp)["cache"] - -# define the parameters to add, their default value, and their list of values -# caution: order must be the same as in `extra_sourcefile`, `extra_sourcefile` use the superset of parameter values -parameters_to_add = { - "use_shmem": (1, [0, 1]), - "use_cmem": (1, [1]), - "filter_height": (15, [15]), - "filter_width": (15, [15]), -} -default_config_string = ",".join([str(p[0]) for p in parameters_to_add.values()]) - -# add parameters to header -for param, (_, values) in parameters_to_add.items(): - new_target["tune_params_keys"].append(param) - new_target["tune_params"][param] = values - -# add parameters to cache -# caution: does not take restrictions into account -extra_configurations = list(product(*[p[1] for p in parameters_to_add.values()])) -for config_string, base_config in target["cache"].items(): - # lookup the base config in the other cachefile using the defaults - source_base_config: dict = extra_source[f"{config_string},{default_config_string}"] - - # delete the old config from the new data - del new_target["cache"][config_string] - - # for each existing config, add as many new configurations as needed by inferring from source - for extra_config in extra_configurations: - extra_config_string = ",".join([str(p) for p in extra_config]) - new_config_string = f"{config_string},{extra_config_string}" - - # lookup the extra config in the other cachefile to use as a basis - try: - source_extra_config: dict = extra_source[new_config_string] - except KeyError: - # as we assume that the extra source is a superset, this config is most likely skipped due to restrictions - continue - new_target_config = deepcopy(source_extra_config) - - # change the values for target based on the relative difference between target, source base and source extra - def change_relatively(target_base, source_base, source_extra): - # check if there are any error values - if isinstance(target_base, str): - return target_base - elif isinstance(source_extra, str): - return source_extra - elif isinstance(source_base, str): - return source_base - # make sure all are the same type - assert type(target_base) == type(source_base) == type(source_extra) - if isinstance(target_base, (list, tuple)): - # if we're dealing with lists, go recursive - assert len(target_base) == len(source_base) == len(source_extra) - return [ - change_relatively(target_base[i], source_base[i], source_extra[i]) for i in range(len(target_base)) - ] - # final check for the type - if not isinstance(target_base, numbers.Real): - raise ValueError( - f"Relative value change is not possible for non-numeric values of type {type(target_base)} ({target_base})" - ) - # since we're dealing with numbers, we can do the relative value change - try: - fraction = source_extra / source_base - return target_base * fraction - except ZeroDivisionError: - return target_base - - # apply the relative value change - for key in [ - "time", - "times", - "compile_time", - "verification_time", - "benchmark_time", - "strategy_time", - "framework_time", - "GFLOP/s", - ]: - new_target_config[key] = change_relatively( - base_config[key], source_base_config[key], source_extra_config[key] - ) - - # add the new config to the new target data - new_target["cache"][new_config_string] = new_target_config - -# check that the extension is succesful -assert len(new_target["cache"]) == len( - extra_source -), f"Lengths don't match; target: {len(new_target['cache'])}, source: {len(extra_source)}" - -# write to the target file -with target_outfile.open("w+") as fp: - json.dump(new_target, fp) diff --git a/cached_data_used/cachefiles/kerneltuner_to_T4.py b/cached_data_used/cachefiles/kerneltuner_to_T4.py deleted file mode 100644 index 9865c6c..0000000 --- a/cached_data_used/cachefiles/kerneltuner_to_T4.py +++ /dev/null @@ -1,16 +0,0 @@ -from pathlib import Path - -from kernel_tuner.cache.cli_tools import convert_t4 - -basepath = Path(__file__).parent -directories = ["convolution_milo", "dedisp_milo", "gemm_milo", "hotspot_milo"] - -for directory in directories: - print(f"Converting files in {directory}") - dirpath = Path(basepath / directory) - assert dirpath.is_dir(), f"Not a directory: {dirpath}" - for infile in dirpath.iterdir(): - if infile.suffix.endswith("json") and not infile.stem.endswith("_T4"): - print(f" | converting {infile.stem}") - outfile = infile.with_stem(infile.stem + "_T4") - convert_t4(infile, outfile) diff --git a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py b/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py deleted file mode 100644 index 0a167db..0000000 --- a/cached_data_used/cachefiles/ktt_values_to_kerneltuner.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Script to overwrite Kernel Tuner brute forced cache files with the objective values of a KTT brute force search. - -Notes: this requires a fully bruteforced KTT and fully bruteforced KernelTuner (KT) cachefile on the same search space. -Objective value is assumed to be time by default. Time is assumed to be in microseconds for KTT and milliseconds for KT. -""" - -import json -from pathlib import Path - -from autotuning_methodology.runner import ktt_param_mapping - -kerneltuner_cachefiles_path = Path(__file__).parent.resolve() -assert kerneltuner_cachefiles_path.exists() -ktt_data_path = kerneltuner_cachefiles_path / "KTT data" -assert ktt_data_path.exists() - -files_to_import = [f for f in ktt_data_path.iterdir() if f.is_file() and f.suffix == ".json"] -ktt_objective_name = "Duration" -kt_objective_name = "time" - -error_status_mapping = { - "ok": None, - "devicelimitsexceeded": '"CompilationFailedConfig"', - "computationfailed": '"RuntimeFailedConfig"', -} - -for file in files_to_import: - # find the associated KernelTuner cachefile to write to - ktt_data = dict(json.loads(file.read_bytes())) - metadata = ktt_data["Metadata"] - device = str(metadata["Device"]) - device_filename = device.replace("NVIDIA GeForce ", "").replace(" ", "_") - kernel = str(ktt_data["Results"][0]["KernelName"]) - kernel_filename = kernel.lower() - kerneltuner_cachefile = kerneltuner_cachefiles_path / kernel_filename / f"{device_filename}.json" - assert kerneltuner_cachefile.exists() - ktt_param_mapping_kernel = ktt_param_mapping[kernel_filename] - print(f"Importing objective values from KTT to KernelTuner file for '{kernel}' on {device}") - - # for each configuration in the KTT file, use the value in the KernelTuner file - config_to_change = dict() - kerneltuner_data = dict(json.loads(kerneltuner_cachefile.read_bytes())) - ktt_results = ktt_data["Results"] - cache = kerneltuner_data["cache"] - assert len(cache) == len(ktt_results) - for ktt_config in ktt_results: - # convert the configuration to T4 style dictionary for fast lookups in the mapping - configuration_ktt = dict() - for param in ktt_config["Configuration"]: - configuration_ktt[param["Name"]] = param["Value"] - - # convert the configuration data with the mapping in the correct order - configuration = dict() - param_map = ktt_param_mapping_kernel - assert len(param_map) == len( - configuration_ktt - ), f"Mapping provided for {len(param_map)} params, but configuration has {len(configuration_ktt)}" - for param_name, mapping in param_map.items(): - param_value = configuration_ktt[param_name] - # if the mapping is None, do not include the parameter - if mapping is None: - pass - # if the mapping is a tuple, the first argument is the new parameter name and the second the value - elif isinstance(mapping, tuple): - param_mapped_name, param_mapped_value = mapping - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - # if it's a list of tuples, map to multiple parameters - elif isinstance(mapping, list): - for param_mapped_name, param_mapped_value in mapping: - if callable(param_mapped_value): - param_mapped_value = param_mapped_value(param_value) - configuration[param_mapped_name] = param_mapped_value - else: - raise ValueError(f"Can not apply parameter mapping of {type(mapping)} ({mapping})") - - # get and validate the Kernel Tuner configuration - lookup_string = ",".join(str(v) for v in configuration.values()) # the key to lookup the configuration - assert lookup_string in cache - kt_config = cache[lookup_string] - for param, value in configuration.items(): - assert kt_config[param] == value - - # replace the objective in the KT configuration with the objective in the KTT configuration - kt_old_objective_value = kt_config[kt_objective_name] - kt_new_objective_value = "" - status = error_status_mapping[str(ktt_config["Status"]).lower()] - if status is None: - kt_new_objective_value = ktt_config["ComputationResults"][0][ktt_objective_name] / 1000 - else: - kt_new_objective_value = status - kerneltuner_data["cache"][lookup_string][kt_objective_name] = kt_new_objective_value - config_to_change[lookup_string] = (kt_old_objective_value, kt_new_objective_value) - # print(f"Replacing {kt_old_objective_value} with {kt_new_objective_value}") - - # load the individual lines of the file - with kerneltuner_cachefile.open(mode="r", encoding="utf-8") as fp: - lines = fp.readlines() - cache_start = False - # write the new data to file - with kerneltuner_cachefile.open(mode="w") as fp: - # for each line in the cache part of the file, lookup the config string in the changes dictionary and replace - for line in lines: - if '"cache":' in line: - cache_start = True - fp.write(line) - elif not cache_start or line[:1] == "}" or len(line) < 3: - fp.write(line) - else: - lookup_string = line.split(":")[0].replace('"', "").strip() - old_value, new_value = config_to_change[lookup_string] - line = line.replace(f'"time": {old_value},', f'"time": {new_value},', 1) - fp.write(line) - - # kerneltuner_cachefile.write_text(json.dumps(kerneltuner_data, indent=3)) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 75e7895..a0185ca 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -6,22 +6,22 @@ "applications": [ { "name": "dedispersion_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "dedispersion_milo.json" }, { "name": "convolution_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "convolution_milo.json" }, { "name": "hotspot_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "hotspot_milo.json" }, { "name": "gemm_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "gemm_milo.json" } ], @@ -31,7 +31,7 @@ "MI250X" ], "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 50, From f64f8a2a1314a70e956130b96755194d66af16bc Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 7 Mar 2025 13:01:56 +0100 Subject: [PATCH 079/234] Updated benchmark_hunb --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index fc5dec0..40d4a5e 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit fc5dec06ecf81dd1222606321f88aca9f7d47918 +Subproject commit 40d4a5ec7615dea11ba3f4884f006c6e7837aed5 From b5e0f7edeb3286a8a36d86fb8db43d6b346f9dbc Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 7 Mar 2025 13:06:34 +0100 Subject: [PATCH 080/234] Fixed an error with quotes in an f-string --- src/autotuning_methodology/searchspace_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 6fee150..08e9e3a 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -81,7 +81,7 @@ def to_valid_array( if replace_missing_measurement_from_times_key is not None: val = np.mean(r["times"][replace_missing_measurement_from_times_key]) else: - raise ValueError(f"Measurement with name {key} not found in {r["measurements"]}") + raise ValueError(f"Measurement with name {key} not found in {r['measurements']}") if len(measurements) == 1: m = measurements[0] if key == m["name"]: From 31978dfb815e53ac4d2b92d4ff7209e96da6568f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 7 Mar 2025 14:57:01 +0100 Subject: [PATCH 081/234] Changed default kernel location --- tests/autotuning_methodology/integration/test_run_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 7186ab1..3d8959b 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -30,7 +30,7 @@ cached_visualization_path = experiment_path_run plot_path = cached_visualization_path / "generated_graphs" cached_visualization_file = experiment_path_run / strategy / "mock_GPU_mocktest_kernel_convolution.npz" -normal_cachefiles_path = package_path / Path(f"cached_data_used/cachefiles/{kernel_id}") +normal_cachefiles_path = package_path / Path(f"benchmark_hub/cachefiles/{kernel_id}") normal_cachefile_destination = normal_cachefiles_path / "mock_gpu.json" experiment_import_filepath_test = mockfiles_path / "test_import_runs.json" assert experiment_import_filepath_test.exists() From 11c5bf417fe48f1cb3c50cf4c04ac0465313c77c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 7 Mar 2025 15:16:29 +0100 Subject: [PATCH 082/234] Changed default experiments location --- src/autotuning_methodology/experiments.py | 4 +++- src/autotuning_methodology/experiments_defaults.json | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 002f4df..b2b6c6d 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -1,6 +1,8 @@ """Main experiments code.""" -from __future__ import annotations # for correct nested type hints e.g. list[str], tuple[dict, str] +from __future__ import ( + annotations, # for correct nested type hints e.g. list[str], tuple[dict, str] +) import json from argparse import ArgumentParser diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index bd99e75..b046365 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -6,12 +6,12 @@ "applications": [ { "name": "convolution", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "convolution.json" }, { "name": "pnpoly", - "folder": "../autotuning_methodology/cached_data_used/kernels", + "folder": "../autotuning_methodology/benchmark_hub/kernels", "input_file": "pnpoly.json" } ], @@ -20,7 +20,7 @@ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "../autotuning_methodology/cached_data_used/cachefiles/${applications}/${gpus}_T4.json" + "regex": "../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 25, From 27efe35e990b25bd52eb57216e8623025bd93a88 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 7 Mar 2025 21:33:45 +0100 Subject: [PATCH 083/234] Adjusted for new paper --- src/autotuning_methodology/experiments_defaults.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index b046365..cacab13 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -20,7 +20,7 @@ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + "regex": "../../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 25, From 5960b93b1824b34164321dc297a92bbbb1839a7e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 8 Mar 2025 23:37:23 +0100 Subject: [PATCH 084/234] Updated experiments file --- .../compare_hypertuners_paper.json | 117 ------------------ 1 file changed, 117 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index a0185ca..c052e27 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -68,60 +68,6 @@ } ] }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "w", - "value": 0.25 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.5 - } - ], - "display_name": "PSO tuned", - "autotuner": "KernelTuner", - "color_parent": "pso_default" - }, - { - "name": "pso_tuned_inverse", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "w", - "value": 0.75 - }, - { - "name": "c1", - "value": 3.0 - } - ], - "display_name": "PSO tuned inv.", - "autotuner": "KernelTuner", - "color_parent": "pso_default" - }, { "name": "greedy_ils_default", "search_method": "greedy_ils", @@ -146,56 +92,6 @@ "display_name": "Greedy ILS default", "autotuner": "KernelTuner" }, - { - "name": "greedy_ils_tuned", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "no_improvement", - "value": 66 - }, - { - "name": "random_walk", - "value": 0.9 - } - ], - "display_name": "Greedy ILS tuned", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, - { - "name": "greedy_ils_tuned_inverse", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "adjacent" - }, - { - "name": "restart", - "value": false - }, - { - "name": "no_improvement", - "value": 66 - }, - { - "name": "random_walk", - "value": 0.9 - } - ], - "display_name": "Greedy ILS tuned inv.", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, { "name": "genetic_algorithm", "search_method": "genetic_algorithm", @@ -207,19 +103,6 @@ "search_method": "dual_annealing", "display_name": "Dual Annealing", "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "CG" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing" } ], "statistics_settings": { From 6079bce6b16bfc14d62f3f450e1e1e89a1d4b4fd Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 8 Mar 2025 23:38:45 +0100 Subject: [PATCH 085/234] Generate a unique filename for generated experiment files to avoid collisions in parallel runs --- src/autotuning_methodology/experiments.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index b2b6c6d..84947b0 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -9,6 +9,7 @@ from math import ceil from os import getcwd, makedirs from pathlib import Path +from random import randint from jsonschema import ValidationError @@ -369,6 +370,16 @@ def generate_input_file(group: dict): json.dump(input_json, fp, indent=4) +def get_random_unique_filename(prefix = '', suffix=''): + """Get a random, unique filename that does not yet exist.""" + def randpath(): + return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + path = randpath() + while path.exists(): + path = randpath() + return path + + def generate_experiment_file( name: str, parent_folder: Path, @@ -376,13 +387,17 @@ def generate_experiment_file( applications: list[dict] = None, gpus: list[str] = None, override: dict = None, + generate_unique_file=False, overwrite_existing_file=False, ): """Creates an experiment file based on the given inputs and opinionated defaults.""" assert isinstance(name, str) and len(name) > 0, f"Name for experiment file must be valid, is '{name}'" experiment_file_path = Path(f"./{name.replace(' ', '_')}.json") - if experiment_file_path.exists() and overwrite_existing_file is False: - raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") + if generate_unique_file is True: + experiment_file_path = get_random_unique_filename(f"{name.replace(' ', '_')}_", '.json') + if experiment_file_path.exists(): + if overwrite_existing_file is False: + raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") defaults_path = Path(__file__).parent / "experiments_defaults.json" with defaults_path.open() as fp: experiment: dict = json.load(fp) From dd02fc8e054ce3be8ab2164569bccc6a870f2202 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 9 Mar 2025 00:02:49 +0100 Subject: [PATCH 086/234] Added tuned and inverse tuned comparisons of basinhopping, dual_annealing, and mls --- .../compare_hypertuners_paper.json | 203 ++++++++++++++++-- 1 file changed, 182 insertions(+), 21 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index c052e27..6df699e 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -26,7 +26,6 @@ } ], "gpus": [ - "A100", "A4000", "MI250X" ], @@ -41,32 +40,116 @@ }, "search_strategies": [ { - "name": "pso_default", - "search_method": "pso", - "display_name": "PSO default", + "name": "basinhopping_default", + "search_method": "basinhopping", + "display_name": "Basinhopping default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "L-BFGS-B" + }, + { + "name": "T", + "value": 1.0 + } + ] + }, + { + "name": "basinhopping_tuned", + "search_method": "basinhopping", + "display_name": "Basinhopping tuned", "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "Nelder-Mead" + }, + { + "name": "T", + "value": 1.0 + } + ], + "color_parent": "basinhopping_default" + }, + { + "name": "basinhopping_tuned_inv", + "search_method": "basinhopping", + "display_name": "Basinhopping inv. tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "L-BFGS-B" + }, + { + "name": "T", + "value": 1.5 + } + ], + "color_parent": "basinhopping_default" + }, + { + "name": "dual_annealing_default", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "Powell" + } + ], + "display_name": "Dual Annealing default", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "Powell" + } + ], + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing_default" + }, + { + "name": "dual_annealing_inv_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "Nelder-Mead" + } + ], + "display_name": "Dual Annealing inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing_default" + }, + { + "name": "genetic_algorithm_default", + "search_method": "genetic_algorithm", "search_method_hyperparameters": [ { "name": "popsize", - "value": 20 + "value": "20" }, { "name": "maxiter", "value": 100 }, { - "name": "w", - "value": 0.5 + "name": "method", + "value": "uniform" }, { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.0 + "name": "mutation_chance", + "value": 10 } - ] + ], + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" }, { "name": "greedy_ils_default", @@ -93,16 +176,94 @@ "autotuner": "KernelTuner" }, { - "name": "genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", + "name": "mls_default", + "search_method": "mls", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" + }, + { + "name": "restart", + "value": "False" + }, + { + "name": "randomize", + "value": "True" + } + ], + "display_name": "MLS default", "autotuner": "KernelTuner" }, { - "name": "dual_annealing", - "search_method": "dual_annealing", - "display_name": "Dual Annealing", - "autotuner": "KernelTuner" + "name": "mls_tuned", + "search_method": "mls", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "adjacent" + }, + { + "name": "restart", + "value": "False" + }, + { + "name": "randomize", + "value": "True" + } + ], + "display_name": "MLS tuned", + "autotuner": "KernelTuner", + "color_parent": "mls_default" + }, + { + "name": "mls_inv_tuned", + "search_method": "mls", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "Hamming" + }, + { + "name": "restart", + "value": "True" + }, + { + "name": "randomize", + "value": "False" + } + ], + "display_name": "MLS inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "mls_default" + }, + { + "name": "pso_default", + "search_method": "pso", + "display_name": "PSO default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "w", + "value": 0.5 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.0 + } + ] } ], "statistics_settings": { From 55399b50860f2a2b01b2199df077cebeef8b23db Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 9 Mar 2025 13:38:00 +0100 Subject: [PATCH 087/234] Added tuning and inv. tuning of differential evolution and genetic algorithm --- .../compare_hypertuners_paper.json | 118 +++++++++++++++++- 1 file changed, 116 insertions(+), 2 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 6df699e..f82f89d 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -26,8 +26,10 @@ } ], "gpus": [ + "A100", "A4000", - "MI250X" + "MI250X", + "W6600" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" @@ -89,6 +91,68 @@ ], "color_parent": "basinhopping_default" }, + { + "name": "diff_evo_default", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1bin" + } + ], + "display_name": "Differential Evolution default", + "autotuner": "KernelTuner" + }, + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1bin" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, + { + "name": "diff_evo_inv_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1bin" + } + ], + "display_name": "Differential Evolution inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, { "name": "dual_annealing_default", "search_method": "dual_annealing", @@ -148,9 +212,59 @@ "value": 10 } ], - "display_name": "Genetic Algorithm", + "display_name": "Genetic Algorithm default", "autotuner": "KernelTuner" }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "disruptive_uniform" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm tuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, + { + "name": "genetic_algorithm_inv_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "two_point" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, { "name": "greedy_ils_default", "search_method": "greedy_ils", From 384cb21bafaf017df0d4f43c15ec1b0b1b6e1d23 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 01:19:43 +0100 Subject: [PATCH 088/234] Added tuning and inv. tuning of PSO --- .../compare_hypertuners_paper.json | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index f82f89d..7ed12fc 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -378,6 +378,64 @@ "value": 1.0 } ] + }, + { + "name": "pso_tuned", + "search_method": "pso", + "display_name": "PSO tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "w", + "value": 0.25 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 0.5 + } + ], + "color_parent": "pso_default" + }, + { + "name": "pso_inv_tuned", + "search_method": "pso", + "display_name": "PSO inv. tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "w", + "value": 0.5 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 0.5 + } + ], + "color_parent": "pso_default" } ], "statistics_settings": { From 85714165348ef8d512e083b16fa325f6467ba4e1 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 01:22:01 +0100 Subject: [PATCH 089/234] Removed basinhopping and dual annealing --- .../compare_hypertuners_paper.json | 75 ------------------- 1 file changed, 75 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 7ed12fc..ab4b7d1 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -41,56 +41,6 @@ "ignore_cache": false }, "search_strategies": [ - { - "name": "basinhopping_default", - "search_method": "basinhopping", - "display_name": "Basinhopping default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 1.0 - } - ] - }, - { - "name": "basinhopping_tuned", - "search_method": "basinhopping", - "display_name": "Basinhopping tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Nelder-Mead" - }, - { - "name": "T", - "value": 1.0 - } - ], - "color_parent": "basinhopping_default" - }, - { - "name": "basinhopping_tuned_inv", - "search_method": "basinhopping", - "display_name": "Basinhopping inv. tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 1.5 - } - ], - "color_parent": "basinhopping_default" - }, { "name": "diff_evo_default", "search_method": "diff_evo", @@ -153,31 +103,6 @@ "autotuner": "KernelTuner", "color_parent": "diff_evo_default" }, - { - "name": "dual_annealing_default", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Powell" - } - ], - "display_name": "Dual Annealing default", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Powell" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" - }, { "name": "dual_annealing_inv_tuned", "search_method": "dual_annealing", From c2f1fd7fb76a06f10a7dffac6551e0348ddc3d40 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 01:22:12 +0100 Subject: [PATCH 090/234] Removed basinhopping and dual annealing --- experiment_files/compare_hypertuners_paper.json | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index ab4b7d1..a513d1b 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -103,19 +103,6 @@ "autotuner": "KernelTuner", "color_parent": "diff_evo_default" }, - { - "name": "dual_annealing_inv_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Nelder-Mead" - } - ], - "display_name": "Dual Annealing inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" - }, { "name": "genetic_algorithm_default", "search_method": "genetic_algorithm", From 97b73e2d62648a3391399caf3c05edf41d37a821 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 08:34:53 +0100 Subject: [PATCH 091/234] With too few data points, return directly instead off smoothing --- src/autotuning_methodology/baseline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index ef3e571..232ca6f 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -138,6 +138,10 @@ def _get_random_curve(self, fevals_range: np.ndarray, smoothing=True) -> np.ndar x = fevals_range y = draws + # if there are too few data points left to interpolate on, return draws + if len(x) < 2 or len(y) < 2: + return draws + # apply the monotonicity-preserving Piecewise Cubic Hermite Interpolating Polynomial smooth_fevals_range = np.linspace(fevals_range[0], fevals_range[-1], len(fevals_range)) smooth_draws = PchipInterpolator(x, y)(smooth_fevals_range) From 7093271a2783de8bd5255d223177302d11deb6e0 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 17:31:37 +0100 Subject: [PATCH 092/234] Adjusted the cutoff percentile for hyperparameter tunoing --- experiment_files/compare_hypertuners_paper.json | 2 +- src/autotuning_methodology/experiments_defaults.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index a513d1b..563377e 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -352,7 +352,7 @@ ], "statistics_settings": { "minimization": true, - "cutoff_percentile": 0.96, + "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, "cutoff_type": "fevals", "objective_time_keys": [ diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index cacab13..c4f0ac2 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -38,7 +38,7 @@ ], "statistics_settings": { "minimization": true, - "cutoff_percentile": 0.96, + "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, "cutoff_type": "fevals", "objective_time_keys": [ From 6c21d42e5e2083a03074ea4154591a4e0e7e8b78 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 10 Mar 2025 23:30:37 +0100 Subject: [PATCH 093/234] Started over with hyperparamtuning, added dual_annealing and MLS tunings --- .../compare_hypertuners_paper.json | 172 ++++-------------- 1 file changed, 38 insertions(+), 134 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 563377e..9829baf 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -42,27 +42,23 @@ }, "search_strategies": [ { - "name": "diff_evo_default", - "search_method": "diff_evo", + "name": "basinhopping_default", + "search_method": "basinhopping", + "display_name": "Basinhopping default", + "autotuner": "KernelTuner", "search_method_hyperparameters": [ { - "name": "popsize", - "value": "20" - }, - { - "name": "maxiter", - "value": 100 + "name": "method", + "value": "L-BFGS-B" }, { - "name": "method", - "value": "best1bin" + "name": "T", + "value": 1.0 } - ], - "display_name": "Differential Evolution default", - "autotuner": "KernelTuner" + ] }, { - "name": "diff_evo_tuned", + "name": "diff_evo_default", "search_method": "diff_evo", "search_method_hyperparameters": [ { @@ -78,82 +74,49 @@ "value": "best1bin" } ], - "display_name": "Differential Evolution tuned", - "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" + "display_name": "Differential Evolution default", + "autotuner": "KernelTuner" }, { - "name": "diff_evo_inv_tuned", - "search_method": "diff_evo", + "name": "dual_annealing_default", + "search_method": "dual_annealing", "search_method_hyperparameters": [ - { - "name": "popsize", - "value": "20" - }, - { - "name": "maxiter", - "value": 100 - }, { "name": "method", - "value": "best1bin" + "value": "Powell" } ], - "display_name": "Differential Evolution inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" + "display_name": "Dual Annealing default", + "autotuner": "KernelTuner" }, { - "name": "genetic_algorithm_default", - "search_method": "genetic_algorithm", + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ - { - "name": "popsize", - "value": "20" - }, - { - "name": "maxiter", - "value": 100 - }, { "name": "method", - "value": "uniform" - }, - { - "name": "mutation_chance", - "value": 10 + "value": "Nelder-Mead" } ], - "display_name": "Genetic Algorithm default", - "autotuner": "KernelTuner" + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing_default" }, { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", + "name": "dual_annealing_inv_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ - { - "name": "popsize", - "value": "20" - }, - { - "name": "maxiter", - "value": 50 - }, { "name": "method", - "value": "disruptive_uniform" - }, - { - "name": "mutation_chance", - "value": 5 + "value": "trust-constr" } ], - "display_name": "Genetic Algorithm tuned", + "display_name": "Dual Annealing inv. tuned", "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" + "color_parent": "dual_annealing_default" }, { - "name": "genetic_algorithm_inv_tuned", + "name": "genetic_algorithm_default", "search_method": "genetic_algorithm", "search_method_hyperparameters": [ { @@ -162,20 +125,19 @@ }, { "name": "maxiter", - "value": 50 + "value": 100 }, { "name": "method", - "value": "two_point" + "value": "uniform" }, { "name": "mutation_chance", - "value": 5 + "value": 10 } ], - "display_name": "Genetic Algorithm inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" + "display_name": "Genetic Algorithm default", + "autotuner": "KernelTuner" }, { "name": "greedy_ils_default", @@ -227,15 +189,15 @@ "search_method_hyperparameters": [ { "name": "neighbor", - "value": "adjacent" + "value": "Hamming" }, { "name": "restart", - "value": "False" + "value": "True" }, { "name": "randomize", - "value": "True" + "value": "False" } ], "display_name": "MLS tuned", @@ -256,7 +218,7 @@ }, { "name": "randomize", - "value": "False" + "value": "True" } ], "display_name": "MLS inv. tuned", @@ -290,64 +252,6 @@ "value": 1.0 } ] - }, - { - "name": "pso_tuned", - "search_method": "pso", - "display_name": "PSO tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "w", - "value": 0.25 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "color_parent": "pso_default" - }, - { - "name": "pso_inv_tuned", - "search_method": "pso", - "display_name": "PSO inv. tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "w", - "value": 0.5 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "color_parent": "pso_default" } ], "statistics_settings": { From 6ac3ea5e46c7a6cd98426b61c2e3523a4e9c36a3 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 11 Mar 2025 22:47:01 +0100 Subject: [PATCH 094/234] Updated hypertuner comparison --- .../compare_hypertuners_paper.json | 205 ++++++++++++++---- src/autotuning_methodology/runner.py | 1 + 2 files changed, 158 insertions(+), 48 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 9829baf..09c23b5 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -10,14 +10,14 @@ "input_file": "dedispersion_milo.json" }, { - "name": "convolution_milo", + "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json" + "input_file": "hotspot_milo.json" }, { - "name": "hotspot_milo", + "name": "convolution_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json" + "input_file": "convolution_milo.json" }, { "name": "gemm_milo", @@ -26,10 +26,10 @@ } ], "gpus": [ + "W6600", "A100", "A4000", - "MI250X", - "W6600" + "MI250X" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" @@ -37,7 +37,7 @@ "stochastic": true, "repeats": 50, "samples": 32, - "minimum_number_of_valid_search_iterations": 10, + "minimum_number_of_valid_search_iterations": 8, "ignore_cache": false }, "search_strategies": [ @@ -57,6 +57,40 @@ } ] }, + { + "name": "basinhopping_tuned", + "search_method": "basinhopping", + "display_name": "Basinhopping tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "BFGS" + }, + { + "name": "T", + "value": 1.25 + } + ], + "color_parent": "basinhopping_default" + }, + { + "name": "basinhopping_inv_tuned", + "search_method": "basinhopping", + "display_name": "Basinhopping inv. tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "L-BFGS-B" + }, + { + "name": "T", + "value": 0.1 + } + ], + "color_parent": "basinhopping_default" + }, { "name": "diff_evo_default", "search_method": "diff_evo", @@ -77,6 +111,48 @@ "display_name": "Differential Evolution default", "autotuner": "KernelTuner" }, + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "best2exp" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, + { + "name": "diff_evo_inv_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": "20" + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "best2exp" + } + ], + "display_name": "Differential Evolution inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, { "name": "dual_annealing_default", "search_method": "dual_annealing", @@ -164,94 +240,127 @@ "autotuner": "KernelTuner" }, { - "name": "mls_default", - "search_method": "mls", + "name": "greedy_ils_tuned", + "search_method": "greedy_ils", "search_method_hyperparameters": [ { "name": "neighbor", - "value": "Hamming" + "value": "adjacent" }, { "name": "restart", - "value": "False" + "value": false }, { - "name": "randomize", - "value": "True" + "name": "no_improvement", + "value": 10 + }, + { + "name": "random_walk", + "value": 0.4 } ], - "display_name": "MLS default", - "autotuner": "KernelTuner" + "display_name": "Greedy ILS tuned", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" }, { - "name": "mls_tuned", - "search_method": "mls", + "name": "greedy_ils_inv_tuned", + "search_method": "greedy_ils", "search_method_hyperparameters": [ { "name": "neighbor", - "value": "Hamming" + "value": "adjacent" }, { "name": "restart", - "value": "True" + "value": false + }, + { + "name": "no_improvement", + "value": 75 }, { - "name": "randomize", - "value": "False" + "name": "random_walk", + "value": 0.5 } ], - "display_name": "MLS tuned", + "display_name": "Greedy ILS inv. tuned", "autotuner": "KernelTuner", - "color_parent": "mls_default" + "color_parent": "greedy_ils_default" }, { - "name": "mls_inv_tuned", - "search_method": "mls", + "name": "simulated_annealing_default", + "search_method": "simulated_annealing", "search_method_hyperparameters": [ { - "name": "neighbor", - "value": "Hamming" + "name": "T", + "value": 1.0 }, { - "name": "restart", - "value": "True" + "name": "T_min", + "value": 0.001 }, { - "name": "randomize", - "value": "True" + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": "1" } ], - "display_name": "MLS inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "mls_default" + "display_name": "Simulated Annealing default", + "autotuner": "KernelTuner" }, { - "name": "pso_default", - "search_method": "pso", - "display_name": "PSO default", - "autotuner": "KernelTuner", + "name": "simulated_annealing_default", + "search_method": "simulated_annealing", "search_method_hyperparameters": [ { - "name": "popsize", - "value": 20 + "name": "T", + "value": 1.0 + }, + { + "name": "T_min", + "value": 0.001 + }, + { + "name": "alpha", + "value": 0.995 }, { "name": "maxiter", - "value": 100 + "value": 1 + } + ], + "display_name": "Simulated Annealing default", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_inv_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.5 }, { - "name": "w", - "value": 0.5 + "name": "T_min", + "value": 0.01 }, { - "name": "c1", - "value": 2.0 + "name": "alpha", + "value": 0.9975 }, { - "name": "c2", - "value": 1.0 + "name": "maxiter", + "value": 2 } - ] + ], + "display_name": "Simulated Annealing inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_default" } ], "statistics_settings": { diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 86512a1..64f21f0 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -165,6 +165,7 @@ def tune_with_kerneltuner(): def tune_with_BAT(): """Interface to tune with the BAT benchmarking suite.""" # TODO integrate with BAT + raise NotImplementedError("This will be implemented in the future.") def tune_with_KTT(): """Interface with KTT to tune the kernel and return the results.""" From f6879328ac3a590fd1ca12d25941789245fd08cc Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 12 Mar 2025 03:25:28 +0100 Subject: [PATCH 095/234] Added genetic algorithm and simulated annealing tuned params --- .../compare_hypertuners_paper.json | 67 ++++++++++++++++--- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 09c23b5..7548717 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -197,7 +197,7 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": "20" + "value": 20 }, { "name": "maxiter", @@ -215,6 +215,56 @@ "display_name": "Genetic Algorithm default", "autotuner": "KernelTuner" }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "two_point" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm tuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, + { + "name": "genetic_algorithm_inv_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "single_point" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, { "name": "greedy_ils_default", "search_method": "greedy_ils", @@ -314,28 +364,29 @@ "autotuner": "KernelTuner" }, { - "name": "simulated_annealing_default", + "name": "simulated_annealing_tuned", "search_method": "simulated_annealing", "search_method_hyperparameters": [ { "name": "T", - "value": 1.0 + "value": 1.5 }, { "name": "T_min", - "value": 0.001 + "value": 0.0001 }, { "name": "alpha", - "value": 0.995 + "value": 0.9925 }, { "name": "maxiter", - "value": 1 + "value": 2 } ], - "display_name": "Simulated Annealing default", - "autotuner": "KernelTuner" + "display_name": "Simulated Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_default" }, { "name": "simulated_annealing_inv_tuned", From d49967aaaeebf2f4a369b30d1f7247045bb7c7e0 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 12 Mar 2025 13:12:34 +0100 Subject: [PATCH 096/234] Added tuned MLS --- .../compare_hypertuners_paper.json | 146 ++++-------------- 1 file changed, 29 insertions(+), 117 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 7548717..e47a634 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -42,154 +42,66 @@ }, "search_strategies": [ { - "name": "basinhopping_default", - "search_method": "basinhopping", - "display_name": "Basinhopping default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 1.0 - } - ] - }, - { - "name": "basinhopping_tuned", - "search_method": "basinhopping", - "display_name": "Basinhopping tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "BFGS" - }, - { - "name": "T", - "value": 1.25 - } - ], - "color_parent": "basinhopping_default" - }, - { - "name": "basinhopping_inv_tuned", - "search_method": "basinhopping", - "display_name": "Basinhopping inv. tuned", - "autotuner": "KernelTuner", + "name": "mls_default", + "search_method": "mls", "search_method_hyperparameters": [ { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 0.1 - } - ], - "color_parent": "basinhopping_default" - }, - { - "name": "diff_evo_default", - "search_method": "diff_evo", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": "20" + "name": "neighbor", + "value": "Hamming" }, { - "name": "maxiter", - "value": 100 + "name": "restart", + "value": false }, { - "name": "method", - "value": "best1bin" + "name": "randomize", + "value": true } ], - "display_name": "Differential Evolution default", + "display_name": "MLS default", "autotuner": "KernelTuner" }, { - "name": "diff_evo_tuned", - "search_method": "diff_evo", + "name": "mls_tuned", + "search_method": "mls", "search_method_hyperparameters": [ { - "name": "popsize", - "value": "20" + "name": "neighbor", + "value": "Hamming" }, { - "name": "maxiter", - "value": 50 + "name": "restart", + "value": true }, { - "name": "method", - "value": "best2exp" + "name": "randomize", + "value": false } ], - "display_name": "Differential Evolution tuned", + "display_name": "MLS tuned", "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" + "color_parent": "mls_default" }, { - "name": "diff_evo_inv_tuned", - "search_method": "diff_evo", + "name": "mls_inv_tuned", + "search_method": "mls", "search_method_hyperparameters": [ { - "name": "popsize", - "value": "20" + "name": "neighbor", + "value": "Hamming" }, { - "name": "maxiter", - "value": 150 + "name": "restart", + "value": true }, { - "name": "method", - "value": "best2exp" - } - ], - "display_name": "Differential Evolution inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" - }, - { - "name": "dual_annealing_default", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Powell" - } - ], - "display_name": "Dual Annealing default", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Nelder-Mead" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" - }, - { - "name": "dual_annealing_inv_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "trust-constr" + "name": "randomize", + "value": true } ], - "display_name": "Dual Annealing inv. tuned", + "display_name": "MLS inv. tuned", "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" + "color_parent": "mls_default" }, { "name": "genetic_algorithm_default", From 77f253b71c10f85386372ac9fb9b0be9da2b1964 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 12 Mar 2025 13:16:00 +0100 Subject: [PATCH 097/234] Put dual annealing back in comparison --- .../compare_hypertuners_paper.json | 58 ++++++------------- 1 file changed, 17 insertions(+), 41 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index e47a634..f1c0187 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -42,66 +42,42 @@ }, "search_strategies": [ { - "name": "mls_default", - "search_method": "mls", + "name": "dual_annealing_default", + "search_method": "dual_annealing", "search_method_hyperparameters": [ { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": false - }, - { - "name": "randomize", - "value": true + "name": "method", + "value": "Powell" } ], - "display_name": "MLS default", + "display_name": "Dual Annealing default", "autotuner": "KernelTuner" }, { - "name": "mls_tuned", - "search_method": "mls", + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "randomize", - "value": false + "name": "method", + "value": "Nelder-Mead" } ], - "display_name": "MLS tuned", + "display_name": "Dual Annealing tuned", "autotuner": "KernelTuner", - "color_parent": "mls_default" + "color_parent": "dual_annealing_default" }, { - "name": "mls_inv_tuned", - "search_method": "mls", + "name": "dual_annealing_inv_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "randomize", - "value": true + "name": "method", + "value": "trust-constr" } ], - "display_name": "MLS inv. tuned", + "display_name": "Dual Annealing inv. tuned", "autotuner": "KernelTuner", - "color_parent": "mls_default" + "color_parent": "dual_annealing_default" }, { "name": "genetic_algorithm_default", From 22446c190e3bb2c623b44a0818cf4c199a34b3a2 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 13 Mar 2025 16:03:18 +0100 Subject: [PATCH 098/234] Improved time margin calculation --- src/autotuning_methodology/curves.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index f3b318a..0442f95 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -714,7 +714,8 @@ def _get_curve_over_time_values_in_range( real_stopping_point_time: float = np.nanmedian(highest_time_per_repeat) # filter to get the time range with a margin on both ends for the isotonic regression - time_range_margin = 0.1 + time_range_margin_modifier = 0.25 * (num_repeats / times.size) # give more margin when there are few values relative to the number of repeats + time_range_margin = 0.1 + time_range_margin_modifier time_range_start = time_range[0] * (1 - time_range_margin) time_range_end = time_range[-1] * (1 + time_range_margin) range_mask_margin = (time_range_start <= times) & (times <= time_range_end) @@ -722,7 +723,8 @@ def _get_curve_over_time_values_in_range( # make sure there is enough overlap in the time ranges if not np.all(np.count_nonzero(range_mask_margin, axis=0) > 1): raise ValueError( - f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=}", + f"Not enough overlap in time range and time values: should be {time_range_start=} <= {times} <= {time_range_end=} ({time_range_margin=}, {num_repeats=}, {times.size=})", + np.count_nonzero(range_mask_margin, axis=0), self.name, self.application_name, self.device_name, From 0787d7e6c806d8300f065277a8f10075b9c8ef6a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 13 Mar 2025 16:03:48 +0100 Subject: [PATCH 099/234] Revised and tested benchmark kernels, added Nvidia A6000 bruteforced --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 40d4a5e..14e2787 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 40d4a5ec7615dea11ba3f4884f006c6e7837aed5 +Subproject commit 14e278790233c6912d9f487eba5d5ea1a380fefe From d4aaa7617ada5bc5557941ed287055ae319ab5ae Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 14 Mar 2025 00:35:59 +0100 Subject: [PATCH 100/234] Added newly tuned basinhopping --- benchmark_hub | 2 +- .../compare_hypertuners_paper.json | 209 +++++------------- 2 files changed, 56 insertions(+), 155 deletions(-) diff --git a/benchmark_hub b/benchmark_hub index 14e2787..62c549d 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 14e278790233c6912d9f487eba5d5ea1a380fefe +Subproject commit 62c549def041efdd19577a83d1ecdc73127c1c32 diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index f1c0187..a1c3844 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -27,6 +27,7 @@ ], "gpus": [ "W6600", + "A6000", "A100", "A4000", "MI250X" @@ -35,12 +36,62 @@ "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 50, + "repeats": 100, "samples": 32, "minimum_number_of_valid_search_iterations": 8, "ignore_cache": false }, "search_strategies": [ + { + "name": "basinhopping_default", + "search_method": "basinhopping", + "display_name": "Basinhopping default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "L-BFGS-B" + }, + { + "name": "T", + "value": 1.0 + } + ] + }, + { + "name": "basinhopping_tuned", + "search_method": "basinhopping", + "display_name": "Basinhopping tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "L-BFGS-B" + }, + { + "name": "T", + "value": 0.1 + } + ], + "color_parent": "basinhopping_default" + }, + { + "name": "basinhopping_inv_tuned", + "search_method": "basinhopping", + "display_name": "Basinhopping inv. tuned", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "BFGS" + }, + { + "name": "T", + "value": 1.0 + } + ], + "color_parent": "basinhopping_default" + }, { "name": "dual_annealing_default", "search_method": "dual_annealing", @@ -59,7 +110,7 @@ "search_method_hyperparameters": [ { "name": "method", - "value": "Nelder-Mead" + "value": "trust-constr" } ], "display_name": "Dual Annealing tuned", @@ -72,7 +123,7 @@ "search_method_hyperparameters": [ { "name": "method", - "value": "trust-constr" + "value": "Powell" } ], "display_name": "Dual Annealing inv. tuned", @@ -103,56 +154,6 @@ "display_name": "Genetic Algorithm default", "autotuner": "KernelTuner" }, - { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "two_point" - }, - { - "name": "mutation_chance", - "value": 20 - } - ], - "display_name": "Genetic Algorithm tuned", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" - }, - { - "name": "genetic_algorithm_inv_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 20 - } - ], - "display_name": "Genetic Algorithm inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" - }, { "name": "greedy_ils_default", "search_method": "greedy_ils", @@ -177,56 +178,6 @@ "display_name": "Greedy ILS default", "autotuner": "KernelTuner" }, - { - "name": "greedy_ils_tuned", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "adjacent" - }, - { - "name": "restart", - "value": false - }, - { - "name": "no_improvement", - "value": 10 - }, - { - "name": "random_walk", - "value": 0.4 - } - ], - "display_name": "Greedy ILS tuned", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, - { - "name": "greedy_ils_inv_tuned", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "adjacent" - }, - { - "name": "restart", - "value": false - }, - { - "name": "no_improvement", - "value": 75 - }, - { - "name": "random_walk", - "value": 0.5 - } - ], - "display_name": "Greedy ILS inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" - }, { "name": "simulated_annealing_default", "search_method": "simulated_annealing", @@ -245,61 +196,11 @@ }, { "name": "maxiter", - "value": "1" + "value": 1 } ], "display_name": "Simulated Annealing default", "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.5 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.9925 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_default" - }, - { - "name": "simulated_annealing_inv_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.5 - }, - { - "name": "T_min", - "value": 0.01 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_default" } ], "statistics_settings": { From d2b3669ca35044b36caf4153d2ec3a6d1eb1ebea Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 14 Mar 2025 10:23:45 +0100 Subject: [PATCH 101/234] Added tuned and inv. tuned for diff_evo, greedy_ils, PSO and simulated annealing --- .../compare_hypertuners_paper.json | 236 ++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index a1c3844..7c5e1d0 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -92,6 +92,68 @@ ], "color_parent": "basinhopping_default" }, + { + "name": "diff_evo_default", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1bin" + } + ], + "display_name": "Differential Evolution default", + "autotuner": "KernelTuner" + }, + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "best2exp" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, + { + "name": "diff_evo_inv_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "randtobest1exp" + } + ], + "display_name": "Differential Evolution inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_default" + }, { "name": "dual_annealing_default", "search_method": "dual_annealing", @@ -178,6 +240,130 @@ "display_name": "Greedy ILS default", "autotuner": "KernelTuner" }, + { + "name": "greedy_ils_tuned", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "adjacent" + }, + { + "name": "restart", + "value": true + }, + { + "name": "no_improvement", + "value": 25 + }, + { + "name": "random_walk", + "value": 0.1 + } + ], + "display_name": "Greedy ILS tuned", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "greedy_ils_inv_tuned", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "neighbor", + "value": "adjacent" + }, + { + "name": "restart", + "value": false + }, + { + "name": "no_improvement", + "value": 75 + }, + { + "name": "random_walk", + "value": 0.4 + } + ], + "display_name": "Greedy ILS inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_default" + }, + { + "name": "pso_default", + "search_method": "pso", + "display_name": "PSO default", + "autotuner": "KernelTuner", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "c1", + "value": 2.0 + }, + { + "name": "c2", + "value": 1.0 + } + ] + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 0.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner", + "color_parent": "pso_default" + }, + { + "name": "pso_inv_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "c1", + "value": 1.0 + }, + { + "name": "c2", + "value": 1.0 + } + ], + "display_name": "PSO inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "pso_default" + }, { "name": "simulated_annealing_default", "search_method": "simulated_annealing", @@ -201,6 +387,56 @@ ], "display_name": "Simulated Annealing default", "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.0 + }, + { + "name": "T_min", + "value": 0.001 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing tuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_default" + }, + { + "name": "simulated_annealing_inv_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.0 + }, + { + "name": "T_min", + "value": 0.01 + }, + { + "name": "alpha", + "value": 0.9925 + }, + { + "name": "maxiter", + "value": 3 + } + ], + "display_name": "Simulated Annealing inv. tuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_default" } ], "statistics_settings": { From 7267df25b42368faee1f5be8fba02932d3450cdb Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 14 Mar 2025 10:24:29 +0100 Subject: [PATCH 102/234] Renamed 'inv.' to untuned --- experiment_files/compare_hypertuners_paper.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 7c5e1d0..224a582 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -78,7 +78,7 @@ { "name": "basinhopping_inv_tuned", "search_method": "basinhopping", - "display_name": "Basinhopping inv. tuned", + "display_name": "Basinhopping untuned", "autotuner": "KernelTuner", "search_method_hyperparameters": [ { @@ -150,7 +150,7 @@ "value": "randtobest1exp" } ], - "display_name": "Differential Evolution inv. tuned", + "display_name": "Differential Evolution untuned", "autotuner": "KernelTuner", "color_parent": "diff_evo_default" }, @@ -188,7 +188,7 @@ "value": "Powell" } ], - "display_name": "Dual Annealing inv. tuned", + "display_name": "Dual Annealing untuned", "autotuner": "KernelTuner", "color_parent": "dual_annealing_default" }, @@ -286,7 +286,7 @@ "value": 0.4 } ], - "display_name": "Greedy ILS inv. tuned", + "display_name": "Greedy ILS untuned", "autotuner": "KernelTuner", "color_parent": "greedy_ils_default" }, @@ -360,7 +360,7 @@ "value": 1.0 } ], - "display_name": "PSO inv. tuned", + "display_name": "PSO untuned", "autotuner": "KernelTuner", "color_parent": "pso_default" }, @@ -434,7 +434,7 @@ "value": 3 } ], - "display_name": "Simulated Annealing inv. tuned", + "display_name": "Simulated Annealing untuned", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_default" } From 6fde831ed972953d515a41ad799f0c29aa3df5e7 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 11:56:08 +0100 Subject: [PATCH 103/234] Adjusted the runner to re-run on too many function evaluations --- src/autotuning_methodology/runner.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 64f21f0..584500b 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -232,7 +232,7 @@ def collect_results( objective = results_description.objective_performance_keys[0] objective_higher_is_better = not results_description.minimization - def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): + def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int, too_many_configs: bool): """If multiple attempts are necessary, report the reason.""" if len_res < 1: print(f"({rep+1}/{group_repeats}) No results found, trying once more...") @@ -242,6 +242,8 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt ) else: print(f"({rep+1}/{group_repeats}) Only invalid results found, trying once more...") + if too_many_configs: + print(f"Too many configurations found ({len_res} of {group['budget']['max_fevals']=} allowed)") # repeat the run as specified repeated_results = [] @@ -264,9 +266,10 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt attempt = 0 only_invalid = True len_res: int = -1 - while only_invalid or len_res < min_num_evals: + too_many_configs = False + while only_invalid or len_res < min_num_evals or too_many_configs: if attempt > 0: - report_multiple_attempts(rep, len_res, group["repeats"], attempt) + report_multiple_attempts(rep, len_res, group["repeats"], attempt, too_many_configs) _, results, total_time_ms = tune( input_file, results_description.application_name, @@ -284,6 +287,8 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt # check if there are only invalid configs in the first min_num_evals, if so, try again temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) only_invalid = len(temp_res_filtered) < 1 + if "max_fevals" in group["budget"]: + too_many_configs = len_res > group["budget"]["max_fevals"] attempt += 1 # register the results repeated_results.append(results) From fd1cf541eeed9d2787c8850e4a1fd86974213a12 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 11:57:25 +0100 Subject: [PATCH 104/234] Registered cutoff times in experiments group, adjusted cutoff margin --- experiment_files/example_visualizations.json | 2 +- src/autotuning_methodology/experiments.py | 29 ++++++++++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/experiment_files/example_visualizations.json b/experiment_files/example_visualizations.json index 682147f..ed3991e 100644 --- a/experiment_files/example_visualizations.json +++ b/experiment_files/example_visualizations.json @@ -44,7 +44,7 @@ "strategy_defaults": { "repeats": 100, "minimum_number_of_evaluations": 20, - "cutoff_margin": 1.1, + "cutoff_margin": 0.1, "stochastic": true, "record_data": [ "time", diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 84947b0..d4adf52 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -289,20 +289,31 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: Returns: A modified group dictionary. """ - group["budget"] = {} - # set cutoff point + # get cutoff point _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time( statistics_settings["cutoff_percentile"] ) + # get cutoff point start + _, _, cutoff_point_start_time = searchspace_stats.cutoff_point_fevals_time( + statistics_settings["cutoff_percentile_start"] + ) # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 - cutoff_margin = group.get("cutoff_margin", 1.1) + cutoff_margin = group.get("cutoff_margin", 0.1) + + # register in the group + group["budget"] = {} + group["cutoff_times"] = { + "cutoff_time_start": max(cutoff_point_start_time * (1 - cutoff_margin), 0.0), + "cutoff_time": cutoff_point_time * cutoff_margin, + } # set when to stop if statistics_settings["cutoff_type"] == "time": - group["budget"]["time_limit"] = cutoff_point_time * cutoff_margin + group["budget"]["time_limit"] = group["cutoff_times"]["cutoff_time"] else: - group["budget"]["max_fevals"] = min(int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size) + budget = min(int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size) + group["budget"]["max_fevals"] = budget # write to group's input file as Budget with open(group["input_file"], "r", encoding="utf-8") as fp: @@ -317,9 +328,9 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: input_json["Budget"][0]["Type"] = "ConfigurationCount" input_json["Budget"][0]["BudgetValue"] = group["budget"]["max_fevals"] + # write the results and return the adjusted group with open(group["input_file"], "w", encoding="utf-8") as fp: json.dump(input_json, fp, indent=4) - return group @@ -370,10 +381,12 @@ def generate_input_file(group: dict): json.dump(input_json, fp, indent=4) -def get_random_unique_filename(prefix = '', suffix=''): +def get_random_unique_filename(prefix="", suffix=""): """Get a random, unique filename that does not yet exist.""" + def randpath(): return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + path = randpath() while path.exists(): path = randpath() @@ -394,7 +407,7 @@ def generate_experiment_file( assert isinstance(name, str) and len(name) > 0, f"Name for experiment file must be valid, is '{name}'" experiment_file_path = Path(f"./{name.replace(' ', '_')}.json") if generate_unique_file is True: - experiment_file_path = get_random_unique_filename(f"{name.replace(' ', '_')}_", '.json') + experiment_file_path = get_random_unique_filename(f"{name.replace(' ', '_')}_", ".json") if experiment_file_path.exists(): if overwrite_existing_file is False: raise FileExistsError(f"Experiments file '{experiment_file_path}' already exists") From 01aff515984700133d3f2ee7fb87cb318488ac1c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 11:59:24 +0100 Subject: [PATCH 105/234] Adjusted time cutoff margin calculation --- src/autotuning_methodology/experiments.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index d4adf52..49b3934 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -305,14 +305,14 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: group["budget"] = {} group["cutoff_times"] = { "cutoff_time_start": max(cutoff_point_start_time * (1 - cutoff_margin), 0.0), - "cutoff_time": cutoff_point_time * cutoff_margin, + "cutoff_time": cutoff_point_time * (1 + cutoff_margin), } # set when to stop if statistics_settings["cutoff_type"] == "time": group["budget"]["time_limit"] = group["cutoff_times"]["cutoff_time"] else: - budget = min(int(ceil(cutoff_point_fevals * cutoff_margin)), searchspace_stats.size) + budget = min(int(ceil(cutoff_point_fevals * (1 + cutoff_margin))), searchspace_stats.size) group["budget"]["max_fevals"] = budget # write to group's input file as Budget @@ -513,6 +513,7 @@ def execute_experiment(filepath: str, profiling: bool = False): group = calculate_budget( group, experiment["statistics_settings"], searchspace_statistics[group["gpu"]][group["application_name"]] ) + raise ValueError(group) results_description = ResultsDescription( run_folder=experiment_folderpath / "run" / group["name"], From 7b01b82d418314d1e4c04b8f3a0b40aed34ca926 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 12:30:44 +0100 Subject: [PATCH 106/234] Cut out results that are beyond the cutoff time to detect and re-run early if necessary --- src/autotuning_methodology/experiments.py | 1 - src/autotuning_methodology/runner.py | 28 +++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 49b3934..18bdcc2 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -513,7 +513,6 @@ def execute_experiment(filepath: str, profiling: bool = False): group = calculate_budget( group, experiment["statistics_settings"], searchspace_statistics[group["gpu"]][group["application_name"]] ) - raise ValueError(group) results_description = ResultsDescription( run_folder=experiment_folderpath / "run" / group["name"], diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 584500b..4dc3829 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -245,6 +245,22 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt if too_many_configs: print(f"Too many configurations found ({len_res} of {group['budget']['max_fevals']=} allowed)") + def cumulative_time_taken(results: list) -> list: + """Calculates the cumulative time taken for each of the configurations in results.""" + config_times = [] + cumulative_time_taken = 0 + for config in results: + config_sum = 0 + for key in config["times"]: + if key in searchspace_stats.objective_time_keys: + time = config["times"][key] + if isinstance(time, (list, tuple)): + time = sum(time) + config_sum += time + cumulative_time_taken += config_sum + config_times.append(cumulative_time_taken) + return config_times + # repeat the run as specified repeated_results = [] total_time_results = np.array([]) @@ -283,12 +299,20 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt results = results["results"] if attempt >= 10: raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") - len_res = len(results) + + # cut out results that are beyond the cutoff time + previous_length = len(results) + time_taken = cumulative_time_taken(results) + cutoff_time = group["cutoff_times"]["cutoff_time"] + results = [res for res, time in zip(results, time_taken) if time <= cutoff_time] + if len(results) < previous_length: + print(f"Cut out {previous_length - len(results)} configurations beyond the cutoff time") + # check if there are only invalid configs in the first min_num_evals, if so, try again temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) only_invalid = len(temp_res_filtered) < 1 if "max_fevals" in group["budget"]: - too_many_configs = len_res > group["budget"]["max_fevals"] + too_many_configs = len(results) > group["budget"]["max_fevals"] attempt += 1 # register the results repeated_results.append(results) From d89098f971c0626ec61b88ce99cf4a09a485f857 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 12:33:34 +0100 Subject: [PATCH 107/234] Require at least two valid configurations --- src/autotuning_methodology/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 4dc3829..5933393 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -310,7 +310,7 @@ def cumulative_time_taken(results: list) -> list: # check if there are only invalid configs in the first min_num_evals, if so, try again temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) - only_invalid = len(temp_res_filtered) < 1 + only_invalid = len(temp_res_filtered) < 2 if "max_fevals" in group["budget"]: too_many_configs = len(results) > group["budget"]["max_fevals"] attempt += 1 From 8a4146cbdd238350c4c4e3d2b9f660d26a4c026a Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Fri, 14 Mar 2025 13:06:27 +0100 Subject: [PATCH 108/234] Early check for valid configs without results that are beyond cutoff times --- src/autotuning_methodology/runner.py | 32 +++++++++---------- .../integration/mockfiles/test.json | 4 +-- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 5933393..49b2635 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -232,7 +232,7 @@ def collect_results( objective = results_description.objective_performance_keys[0] objective_higher_is_better = not results_description.minimization - def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int, too_many_configs: bool): + def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): """If multiple attempts are necessary, report the reason.""" if len_res < 1: print(f"({rep+1}/{group_repeats}) No results found, trying once more...") @@ -242,8 +242,6 @@ def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt ) else: print(f"({rep+1}/{group_repeats}) Only invalid results found, trying once more...") - if too_many_configs: - print(f"Too many configurations found ({len_res} of {group['budget']['max_fevals']=} allowed)") def cumulative_time_taken(results: list) -> list: """Calculates the cumulative time taken for each of the configurations in results.""" @@ -282,10 +280,11 @@ def cumulative_time_taken(results: list) -> list: attempt = 0 only_invalid = True len_res: int = -1 - too_many_configs = False - while only_invalid or len_res < min_num_evals or too_many_configs: + while only_invalid or len_res < min_num_evals: if attempt > 0: - report_multiple_attempts(rep, len_res, group["repeats"], attempt, too_many_configs) + report_multiple_attempts(rep, len_res, group["repeats"], attempt) + if attempt >= 10: + raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") _, results, total_time_ms = tune( input_file, results_description.application_name, @@ -297,22 +296,21 @@ def cumulative_time_taken(results: list) -> list: searchspace_stats, ) results = results["results"] - if attempt >= 10: - raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") - # cut out results that are beyond the cutoff time - previous_length = len(results) + # check without results that are beyond the cutoff times time_taken = cumulative_time_taken(results) cutoff_time = group["cutoff_times"]["cutoff_time"] - results = [res for res, time in zip(results, time_taken) if time <= cutoff_time] - if len(results) < previous_length: - print(f"Cut out {previous_length - len(results)} configurations beyond the cutoff time") + cutoff_time_start = group["cutoff_times"]["cutoff_time_start"] + temp_results = [res for res, time in zip(results, time_taken) if cutoff_time_start <= time <= cutoff_time] + # if len(temp_results) < len(results): + # print( + # f"Dropped {len(results) - len(temp_results)} configurations beyond cutoff time {round(cutoff_time, 3)}, {len(temp_results)} left" + # ) # check if there are only invalid configs in the first min_num_evals, if so, try again - temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), results)) - only_invalid = len(temp_res_filtered) < 2 - if "max_fevals" in group["budget"]: - too_many_configs = len(results) > group["budget"]["max_fevals"] + len_res = len(temp_results) + temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), temp_results)) + only_invalid = len(temp_res_filtered) < 2 # there must be at least two valid configurations attempt += 1 # register the results repeated_results.append(results) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 3e29115..af03c39 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -75,7 +75,7 @@ "y_axis_value_types": [ "gpus" ], - "vmin": -0.75, + "vmin": -2.5, "vmax": 1.2 }, { @@ -87,7 +87,7 @@ "y_axis_value_types": [ "searchspaces" ], - "vmin": -0.75, + "vmin": -2.5, "bins": 10 }, { From 5db75a08fcd959e3b48ee2fb12cd4101e14a62e2 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 14 Mar 2025 21:48:47 +0100 Subject: [PATCH 109/234] Added Genetic Algorithm tuned and untuned --- benchmark_hub | 2 +- .../compare_hypertuners_paper.json | 50 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 62c549d..13bdcc9 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 62c549def041efdd19577a83d1ecdc73127c1c32 +Subproject commit 13bdcc9660586f2ecb2e6c9f63909a5138d614f3 diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 224a582..65ac247 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -216,6 +216,56 @@ "display_name": "Genetic Algorithm default", "autotuner": "KernelTuner" }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "two_point" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm tuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, + { + "name": "genetic_algorithm_inv_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "uniform" + }, + { + "name": "mutation_chance", + "value": 10 + } + ], + "display_name": "Genetic Algorithm untuned", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_default" + }, { "name": "greedy_ils_default", "search_method": "greedy_ils", From 6e203f1cfe18a235b60e6fc5ea93cddc27117762 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 15 Mar 2025 09:04:11 +0100 Subject: [PATCH 110/234] Added MLS, removed Basinhopping, Dual Annealing, and Greedy ILS --- .../compare_hypertuners_paper.json | 140 +++--------------- 1 file changed, 20 insertions(+), 120 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 65ac247..486f8df 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -38,60 +38,10 @@ "stochastic": true, "repeats": 100, "samples": 32, - "minimum_number_of_valid_search_iterations": 8, + "minimum_number_of_valid_search_iterations": 4, "ignore_cache": false }, "search_strategies": [ - { - "name": "basinhopping_default", - "search_method": "basinhopping", - "display_name": "Basinhopping default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 1.0 - } - ] - }, - { - "name": "basinhopping_tuned", - "search_method": "basinhopping", - "display_name": "Basinhopping tuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "L-BFGS-B" - }, - { - "name": "T", - "value": 0.1 - } - ], - "color_parent": "basinhopping_default" - }, - { - "name": "basinhopping_inv_tuned", - "search_method": "basinhopping", - "display_name": "Basinhopping untuned", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "BFGS" - }, - { - "name": "T", - "value": 1.0 - } - ], - "color_parent": "basinhopping_default" - }, { "name": "diff_evo_default", "search_method": "diff_evo", @@ -154,44 +104,6 @@ "autotuner": "KernelTuner", "color_parent": "diff_evo_default" }, - { - "name": "dual_annealing_default", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Powell" - } - ], - "display_name": "Dual Annealing default", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "trust-constr" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" - }, - { - "name": "dual_annealing_inv_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Powell" - } - ], - "display_name": "Dual Annealing untuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_default" - }, { "name": "genetic_algorithm_default", "search_method": "genetic_algorithm", @@ -267,8 +179,8 @@ "color_parent": "genetic_algorithm_default" }, { - "name": "greedy_ils_default", - "search_method": "greedy_ils", + "name": "mls_default", + "search_method": "mls", "search_method_hyperparameters": [ { "name": "neighbor", @@ -276,48 +188,40 @@ }, { "name": "restart", - "value": true - }, - { - "name": "no_improvement", - "value": 50 + "value": false }, { - "name": "random_walk", - "value": 0.3 + "name": "randomize", + "value": true } ], - "display_name": "Greedy ILS default", + "display_name": "MLS default", "autotuner": "KernelTuner" }, { - "name": "greedy_ils_tuned", - "search_method": "greedy_ils", + "name": "mls_tuned", + "search_method": "mls", "search_method_hyperparameters": [ { "name": "neighbor", - "value": "adjacent" + "value": "Hamming" }, { "name": "restart", "value": true }, { - "name": "no_improvement", - "value": 25 - }, - { - "name": "random_walk", - "value": 0.1 + "name": "randomize", + "value": false } ], - "display_name": "Greedy ILS tuned", + "display_name": "MLS tuned", "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" + "color_parent": "mls_default" }, { - "name": "greedy_ils_inv_tuned", - "search_method": "greedy_ils", + "name": "mls_inv_tuned", + "search_method": "mls", "search_method_hyperparameters": [ { "name": "neighbor", @@ -328,17 +232,13 @@ "value": false }, { - "name": "no_improvement", - "value": 75 - }, - { - "name": "random_walk", - "value": 0.4 + "name": "randomize", + "value": false } ], - "display_name": "Greedy ILS untuned", + "display_name": "MLS inv. tuned", "autotuner": "KernelTuner", - "color_parent": "greedy_ils_default" + "color_parent": "mls_default" }, { "name": "pso_default", From 9c6e05296afed5173b44aba767e7baab9b66ca61 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 15 Mar 2025 18:53:18 +0100 Subject: [PATCH 111/234] Useful error on loading numpy file exception, increased number of attempts --- src/autotuning_methodology/caching.py | 6 +++++- src/autotuning_methodology/runner.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index aeabd46..d08aca5 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -169,7 +169,11 @@ def __read_from_file(self) -> list[np.ndarray]: raise ValueError(f"File {full_filepath} does not exist") # load the data and verify the resultsdescription object is the same - data = np.load(full_filepath, allow_pickle=True) + try: + data = np.load(full_filepath, allow_pickle=True) + except Exception as e: + print(f"/!\\ Error loading file: {full_filepath} /!\\") + raise e data_results_description = data["resultsdescription"].item() assert self.is_same_as(data_results_description), "The results description of the results is not the same" diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 49b2635..651f27f 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -283,7 +283,7 @@ def cumulative_time_taken(results: list) -> list: while only_invalid or len_res < min_num_evals: if attempt > 0: report_multiple_attempts(rep, len_res, group["repeats"], attempt) - if attempt >= 10: + if attempt >= 20: raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") _, results, total_time_ms = tune( input_file, From 0d0a2cefbaecd46eb1622c3f687a94ff741ebb24 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 15 Mar 2025 20:00:42 +0100 Subject: [PATCH 112/234] Remove substrings from label data --- src/autotuning_methodology/visualize_experiments.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 7637799..2416051 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -29,6 +29,10 @@ # The kernel information per device and device information for visualization purposes marker_variatons = ["v", "s", "*", "1", "2", "d", "P", "X"] +remove_from_gpus_label = "" +remove_from_applications_label = " milo" +remove_from_searchspace_label = " milo" + # total set of objective time keys objective_time_keys_values = ["compilation", "benchmark", "framework", "search_algorithm", "validation"] @@ -444,10 +448,10 @@ def __init__( "cutoff_percentile_start", 0.01 ) label_data = { - "gpus": (list(dict.fromkeys([t[0] for t in strategy_data])), "GPUs"), - "applications": (list(dict.fromkeys([t[1] for t in strategy_data])), "Applications"), + "gpus": (list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), "GPUs"), + "applications": (list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), "Applications"), "searchspaces": ( - list(dict.fromkeys([f"{t[1]} on\n{t[0]}" for t in strategy_data])), + list(dict.fromkeys([f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data])), "Searchspaces", ), "time": ( From ea708739a5cfa4142b21530e8269e10a231c08ab Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 17 Mar 2025 11:19:47 +0100 Subject: [PATCH 113/234] Colorbar limits --- src/autotuning_methodology/visualize_experiments.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 7637799..6e23b47 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -352,8 +352,10 @@ def __init__( style: str = plot["style"] if scope != "search_strategy": continue - if style != "heatmap": - raise NotImplementedError(f"Scope {scope} currently only supports 'heatmap' as a style, not {style}") + if style != "heatmap" and style != "compare_heatmaps": + raise NotImplementedError( + f"Scope {scope} currently only supports 'heatmap' or 'compare_heatmaps' as a style, not {style}" + ) plot_x_value_types: list[str] = plot["x_axis_value_types"] plot_y_value_types: list[str] = plot["y_axis_value_types"] assert len(plot_x_value_types) == 1 @@ -510,6 +512,11 @@ def norm_color_val(v): axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") + # set colorbar limits + cbar_min = -2.5 + cbar_max = 1.0 + hm.set_clim(cbar_min, cbar_max) # This does not affect the colormap, only the bar + # plot the colorbar cbar = fig.colorbar(hm) cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") From 53dd03409e68ffde117cc0b345767aaa61322588 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 17 Mar 2025 11:32:05 +0100 Subject: [PATCH 114/234] Added setting colorbar limits --- .../visualize_experiments.py | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 33930f2..52fb3ed 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -369,6 +369,9 @@ def __init__( bins = plot.get("bins", 10) vmin = plot.get("vmin", -10.0) vmax = plot.get("vmax", 1.0) + cmin = plot.get("cmin", -10.0) + cmax = plot.get("cmax", 1.0) + cnum = plot.get("cnum", 5) if vmin != -10.0: warnings.warn( f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin." @@ -377,6 +380,14 @@ def __init__( warnings.warn( f"Careful: VMax has been changed from 1.0 to {vmax}. This breaks visual comparison compatiblity with plots that do not have the same VMax." ) + if cmin < vmin: + raise ValueError( + f"Colorbar minimum can't be lower than the minimum value of the heatmap: {cmin} < {vmin}" + ) + if cmax > vmax: + raise ValueError( + f"Colorbar maximum can't be higher than the maximum value of the heatmap: {cmax} > {vmax}" + ) # collect and plot the data for each search strategy data_collected: dict[str, list[tuple]] = defaultdict(list) @@ -450,10 +461,20 @@ def __init__( "cutoff_percentile_start", 0.01 ) label_data = { - "gpus": (list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), "GPUs"), - "applications": (list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), "Applications"), + "gpus": ( + list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + "GPUs", + ), + "applications": ( + list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), + "Applications", + ), "searchspaces": ( - list(dict.fromkeys([f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data])), + list( + dict.fromkeys( + [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] + ) + ), "Searchspaces", ), "time": ( @@ -516,13 +537,16 @@ def norm_color_val(v): axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") - # set colorbar limits - cbar_min = -2.5 - cbar_max = 1.0 - hm.set_clim(cbar_min, cbar_max) # This does not affect the colormap, only the bar + # hm.set_clim(cmin, cmax) # This does not affect the colormap, only the bar + # cbar = plt.colorbar(hm) + # cbar.set_clim(cmin, cmax) # This does not affect the colormap, only the bar + # cbar.ax.set_ylim(cmin, cmax) # Adjust visible colorbar limits # plot the colorbar cbar = fig.colorbar(hm) + if cmin != vmin or cmax != vmax: + cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits + cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") # keep only non-overlapping ticks From b7dcd69aa1128f7fc030196e91e5b7cbcc30f0ab Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 17 Mar 2025 11:35:00 +0100 Subject: [PATCH 115/234] Minor improvements to warnings and removed redundant code --- .../visualize_experiments.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 52fb3ed..8e29b7d 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -367,18 +367,18 @@ def __init__( x_type = plot_x_value_types[0] y_type = plot_y_value_types[0] bins = plot.get("bins", 10) - vmin = plot.get("vmin", -10.0) - vmax = plot.get("vmax", 1.0) - cmin = plot.get("cmin", -10.0) - cmax = plot.get("cmax", 1.0) - cnum = plot.get("cnum", 5) + vmin = plot.get("vmin", -10.0) # color range lower limit + vmax = plot.get("vmax", 1.0) # color range upper limit + cmin = plot.get("cmin", -10.0) # colorbar lower limit + cmax = plot.get("cmax", 1.0) # colorbar upper limit + cnum = plot.get("cnum", 5) # number of ticks on the colorbar if vmin != -10.0: warnings.warn( - f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin." + f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin. Maybe use cmin instead?." ) if vmax != 1.0: warnings.warn( - f"Careful: VMax has been changed from 1.0 to {vmax}. This breaks visual comparison compatiblity with plots that do not have the same VMax." + f"Careful: VMax has been changed from 1.0 to {vmax}. This breaks visual comparison compatiblity with plots that do not have the same VMax. Maybe use cmax instead?" ) if cmin < vmin: raise ValueError( @@ -537,11 +537,6 @@ def norm_color_val(v): axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") - # hm.set_clim(cmin, cmax) # This does not affect the colormap, only the bar - # cbar = plt.colorbar(hm) - # cbar.set_clim(cmin, cmax) # This does not affect the colormap, only the bar - # cbar.ax.set_ylim(cmin, cmax) # Adjust visible colorbar limits - # plot the colorbar cbar = fig.colorbar(hm) if cmin != vmin or cmax != vmax: From f7320d1c67a5e0cf106dca645f058fc83ab90f28 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 17 Mar 2025 16:35:26 +0100 Subject: [PATCH 116/234] Added warning for broken numpy files --- src/autotuning_methodology/caching.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index d08aca5..bbb7d45 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -180,7 +180,11 @@ def __read_from_file(self) -> list[np.ndarray]: # get the numpy arrays numpy_arrays = list() for numpy_array_key in self.numpy_arrays_keys: - numpy_arrays.append(data[numpy_array_key]) + try: + numpy_arrays.append(data[numpy_array_key]) + except Exception as e: + print(f"/!\\ Error adding numpy array {numpy_array_key} from file: {full_filepath} /!\\") + raise e return numpy_arrays def get_results(self) -> Results: From 9c0ca13b750d040c81b5841564e4f32cbf52015b Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 18 Mar 2025 16:26:26 +0100 Subject: [PATCH 117/234] Set compare_hypertuners_paper experiments file as in paper --- .../compare_hypertuners_paper.json | 245 +++--------------- 1 file changed, 37 insertions(+), 208 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 486f8df..dd456d9 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -36,97 +36,36 @@ "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 100, + "repeats": 50, "samples": 32, "minimum_number_of_valid_search_iterations": 4, "ignore_cache": false }, "search_strategies": [ { - "name": "diff_evo_default", - "search_method": "diff_evo", + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, { "name": "method", - "value": "best1bin" + "value": "COBYLA" } ], - "display_name": "Differential Evolution default", + "display_name": "Dual Annealing tuned", "autotuner": "KernelTuner" }, { - "name": "diff_evo_tuned", - "search_method": "diff_evo", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "best2exp" - } - ], - "display_name": "Differential Evolution tuned", - "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" - }, - { - "name": "diff_evo_inv_tuned", - "search_method": "diff_evo", + "name": "dual_annealing_inv_tuned", + "search_method": "dual_annealing", "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 150 - }, { "name": "method", - "value": "randtobest1exp" + "value": "Nelder-Mead" } ], - "display_name": "Differential Evolution untuned", + "display_name": "Dual Annealing untuned", "autotuner": "KernelTuner", - "color_parent": "diff_evo_default" - }, - { - "name": "genetic_algorithm_default", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "method", - "value": "uniform" - }, - { - "name": "mutation_chance", - "value": 10 - } - ], - "display_name": "Genetic Algorithm default", - "autotuner": "KernelTuner" + "color_parent": "dual_annealing_tuned" }, { "name": "genetic_algorithm_tuned", @@ -138,20 +77,19 @@ }, { "name": "maxiter", - "value": 150 + "value": 50 }, { "name": "method", - "value": "two_point" + "value": "uniform" }, { "name": "mutation_chance", - "value": 5 + "value": 20 } ], "display_name": "Genetic Algorithm tuned", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" + "autotuner": "KernelTuner" }, { "name": "genetic_algorithm_inv_tuned", @@ -159,7 +97,7 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 30 + "value": 10 }, { "name": "maxiter", @@ -167,102 +105,16 @@ }, { "name": "method", - "value": "uniform" + "value": "two_point" }, { "name": "mutation_chance", - "value": 10 + "value": 20 } ], "display_name": "Genetic Algorithm untuned", "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_default" - }, - { - "name": "mls_default", - "search_method": "mls", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": false - }, - { - "name": "randomize", - "value": true - } - ], - "display_name": "MLS default", - "autotuner": "KernelTuner" - }, - { - "name": "mls_tuned", - "search_method": "mls", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "randomize", - "value": false - } - ], - "display_name": "MLS tuned", - "autotuner": "KernelTuner", - "color_parent": "mls_default" - }, - { - "name": "mls_inv_tuned", - "search_method": "mls", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "adjacent" - }, - { - "name": "restart", - "value": false - }, - { - "name": "randomize", - "value": false - } - ], - "display_name": "MLS inv. tuned", - "autotuner": "KernelTuner", - "color_parent": "mls_default" - }, - { - "name": "pso_default", - "search_method": "pso", - "display_name": "PSO default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.0 - } - ] + "color_parent": "genetic_algorithm_tuned" }, { "name": "pso_tuned", @@ -270,7 +122,7 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 30 + "value": 20 }, { "name": "maxiter", @@ -282,12 +134,11 @@ }, { "name": "c2", - "value": 0.5 + "value": 1.5 } ], "display_name": "PSO tuned", - "autotuner": "KernelTuner", - "color_parent": "pso_default" + "autotuner": "KernelTuner" }, { "name": "pso_inv_tuned", @@ -299,7 +150,7 @@ }, { "name": "maxiter", - "value": 100 + "value": 150 }, { "name": "c1", @@ -307,36 +158,12 @@ }, { "name": "c2", - "value": 1.0 + "value": 1.5 } ], "display_name": "PSO untuned", "autotuner": "KernelTuner", - "color_parent": "pso_default" - }, - { - "name": "simulated_annealing_default", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.0 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.995 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing default", - "autotuner": "KernelTuner" + "color_parent": "pso_tuned" }, { "name": "simulated_annealing_tuned", @@ -344,24 +171,23 @@ "search_method_hyperparameters": [ { "name": "T", - "value": 1.0 + "value": 0.5 }, { "name": "T_min", - "value": 0.001 + "value": 0.0001 }, { "name": "alpha", - "value": 0.995 + "value": 0.9975 }, { "name": "maxiter", - "value": 1 + "value": 2 } ], "display_name": "Simulated Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_default" + "autotuner": "KernelTuner" }, { "name": "simulated_annealing_inv_tuned", @@ -373,20 +199,20 @@ }, { "name": "T_min", - "value": 0.01 + "value": 0.0001 }, { "name": "alpha", - "value": 0.9925 + "value": 0.9975 }, { "name": "maxiter", - "value": 3 + "value": 2 } ], "display_name": "Simulated Annealing untuned", "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_default" + "color_parent": "simulated_annealing_tuned" } ], "statistics_settings": { @@ -433,7 +259,10 @@ ], "y_axis_value_types": [ "gpus" - ] + ], + "cmin": -2.5, + "include_y_labels": true, + "include_colorbar": true }, { "scope": "search_strategy", From 74b746f1be3f65a697d70d73d5321ac85f25baed Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 18 Mar 2025 16:27:13 +0100 Subject: [PATCH 118/234] Added new and improved plotting options for heatmaps --- .../visualize_experiments.py | 301 +++++++++++------- 1 file changed, 193 insertions(+), 108 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 8e29b7d..76ba03a 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -372,6 +372,8 @@ def __init__( cmin = plot.get("cmin", -10.0) # colorbar lower limit cmax = plot.get("cmax", 1.0) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar + include_y_labels = plot.get("include_y_labels", True) + include_colorbar = plot.get("include_colorbar", True) if vmin != -10.0: warnings.warn( f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin. Maybe use cmin instead?." @@ -389,6 +391,22 @@ def __init__( f"Colorbar maximum can't be higher than the maximum value of the heatmap: {cmax} > {vmax}" ) + # set the colormap + def norm_color_val(v): + """Normalize a color value to fit in the 0-1 range.""" + return (v - vmin) / (vmax - vmin) + + cmap = LinearSegmentedColormap.from_list( + "my_colormap", + [ + (norm_color_val(-10.0), "black"), + (norm_color_val(-4.0), "red"), + (norm_color_val(-1.0), "orange"), + (norm_color_val(0.0), "yellow"), + (norm_color_val(1.0), "green"), + ], + ) + # collect and plot the data for each search strategy data_collected: dict[str, list[tuple]] = defaultdict(list) for strategy in self.strategies: @@ -452,56 +470,122 @@ def __init__( data_collected[strategy_name].append( tuple([gpu_display_name, application_display_name, score, score_binned]) ) - - # get the performance per selected type in an array - strategy_data = data_collected[strategy_name] - plot_data = np.stack(np.array([t[2] for t in strategy_data])) - cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) - cutoff_percentile_start: float = self.experiment["statistics_settings"].get( - "cutoff_percentile_start", 0.01 - ) - label_data = { - "gpus": ( - list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), - "GPUs", - ), - "applications": ( - list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), - "Applications", - ), - "searchspaces": ( - list( - dict.fromkeys( - [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] - ) - ), - "Searchspaces", - ), - "time": ( - np.round(np.linspace(0.0, 1.0, bins), 2), - f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", - ), - } - x_ticks = label_data[x_type][0] - y_ticks = label_data[y_type][0] - if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): - plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) - if x_type == "searchspaces": - plot_data = plot_data.transpose() - elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): - plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) - if x_type == "gpus": - plot_data = np.transpose(plot_data) - else: - raise NotImplementedError( - f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + if style == "heatmap": + for strategy in self.strategies: + strategy_name = strategy["name"] + strategy_displayname = strategy["display_name"] + strategy_data = data_collected[strategy_name] + + # get the performance per selected type in an array + plot_data = np.stack(np.array([t[2] for t in strategy_data])) + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1.0) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + "cutoff_percentile_start", 0.01 ) + label_data = { + "gpus": ( + list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + "GPUs", + ), + "applications": ( + list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), + "Applications", + ), + "searchspaces": ( + list( + dict.fromkeys( + [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] + ) + ), + "Searchspaces", + ), + "time": ( + np.round(np.linspace(0.0, 1.0, bins), 2), + f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", + ), + } + x_ticks = label_data[x_type][0] + y_ticks = label_data[y_type][0] + if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): + plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) + if x_type == "searchspaces": + plot_data = plot_data.transpose() + elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): + plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) + if x_type == "gpus": + plot_data = np.transpose(plot_data) + else: + raise NotImplementedError( + f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + ) - # validate the data - outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert ( - len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + # validate the data + outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + + # set up the plot + fig, axs = plt.subplots( + ncols=1, figsize=(7, 4), dpi=300 + ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + if not hasattr(axs, "__len__"): + axs = [axs] + title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) + + # plot the heatmap + axs[0].set_xlabel(label_data[x_type][1]) + axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=0) + if include_y_labels: + axs[0].set_ylabel(label_data[y_type][1]) + axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) + else: + axs[0].set_yticks(ticks=np.arange(len(y_ticks))) + axs[0].tick_params(labelleft=False) + hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") + + # plot the colorbar + if not include_colorbar: + cbar = fig.colorbar(hm) + if cmin != vmin or cmax != vmax: + cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits + cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits + # cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") + cbar.set_label("Performance score") + + # keep only non-overlapping ticks + max_ticks = 15 + if len(x_ticks) > max_ticks: + indices = np.linspace(0, len(x_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(x_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].xaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) + if len(y_ticks) > max_ticks: + indices = np.linspace(0, len(y_ticks) - 1, max_ticks).round() + hide_tick = np.isin(np.arange(len(y_ticks)), indices, invert=True, assume_unique=True) + for i, t in enumerate(axs[0].yaxis.get_ticklabels()): + if hide_tick[i]: + t.set_visible(False) + + # finalize the figure and save or display it + fig.tight_layout() + if save_figs: + filename_path = ( + Path(self.plot_filename_prefix) + / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" + ) + fig.savefig(filename_path, dpi=300) + print(f"Figure saved to {filename_path}") + else: + plt.show() + elif style == "compare_heatmaps": + comparisons = plot["comparison"] + + raise NotImplementedError("Still a work in progress") # set up the plot fig, axs = plt.subplots( @@ -509,67 +593,68 @@ def __init__( ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] - title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" - fig.canvas.manager.set_window_title(title) - if not save_figs: - fig.suptitle(title) - - # set the colormap - def norm_color_val(v): - """Normalize a color value to fit in the 0-1 range.""" - return (v - vmin) / (vmax - vmin) - - cmap = LinearSegmentedColormap.from_list( - "my_colormap", - [ - (norm_color_val(-10.0), "black"), - (norm_color_val(-4.0), "red"), - (norm_color_val(-1.0), "orange"), - (norm_color_val(0.0), "yellow"), - (norm_color_val(1.0), "green"), - ], - ) - - # plot the heatmap - axs[0].set_xlabel(label_data[x_type][1]) - axs[0].set_ylabel(label_data[y_type][1]) - axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=45) - axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) - hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") - - # plot the colorbar - cbar = fig.colorbar(hm) - if cmin != vmin or cmax != vmax: - cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits - cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits - cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") - - # keep only non-overlapping ticks - max_ticks = 15 - if len(x_ticks) > max_ticks: - indices = np.linspace(0, len(x_ticks) - 1, max_ticks).round() - hide_tick = np.isin(np.arange(len(x_ticks)), indices, invert=True, assume_unique=True) - for i, t in enumerate(axs[0].xaxis.get_ticklabels()): - if hide_tick[i]: - t.set_visible(False) - if len(y_ticks) > max_ticks: - indices = np.linspace(0, len(y_ticks) - 1, max_ticks).round() - hide_tick = np.isin(np.arange(len(y_ticks)), indices, invert=True, assume_unique=True) - for i, t in enumerate(axs[0].yaxis.get_ticklabels()): - if hide_tick[i]: - t.set_visible(False) - - # finalize the figure and save or display it - fig.tight_layout() - if save_figs: - filename_path = ( - Path(self.plot_filename_prefix) - / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" + # title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" + # fig.canvas.manager.set_window_title(title) + # if not save_figs: + # fig.suptitle(title) + + for comparison in comparisons: + strategy_names = comparisons["strategies"] + strategies = [s for s in self.strategies if s["name"]] + # for strategy in strategies: + strategy_displayname = strategy["display_name"] + strategy_data = data_collected[strategy_name] + + # get the performance per selected type in an array + plot_data = np.stack(np.array([t[2] for t in strategy_data])) + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + "cutoff_percentile_start", 0.01 ) - fig.savefig(filename_path, dpi=300) - print(f"Figure saved to {filename_path}") - else: - plt.show() + label_data = { + "gpus": ( + list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + "GPUs", + ), + "applications": ( + list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), + "Applications", + ), + "searchspaces": ( + list( + dict.fromkeys( + [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] + ) + ), + "Searchspaces", + ), + "time": ( + np.round(np.linspace(0.0, 1.0, bins), 2), + f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", + ), + } + x_ticks = label_data[x_type][0] + y_ticks = label_data[y_type][0] + if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): + plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) + if x_type == "searchspaces": + plot_data = plot_data.transpose() + elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): + plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) + if x_type == "gpus": + plot_data = np.transpose(plot_data) + else: + raise NotImplementedError( + f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + ) + + # validate the data + outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + else: + raise NotImplementedError(f"Invalid {style=}") # plot the aggregated searchspaces for plot in plots: From 1410ac1b4ddec91e924b795c13e2deac32ac0a68 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 18 Mar 2025 16:42:15 +0100 Subject: [PATCH 119/234] Updated benchmark_hub submodule --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 13bdcc9..88d8fe5 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 13bdcc9660586f2ecb2e6c9f63909a5138d614f3 +Subproject commit 88d8fe5e8bbd317abb25155ac4490f1ad19be37f From 9fc73835126790386e096c8355edc377b1936b82 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 18 Mar 2025 16:51:04 +0100 Subject: [PATCH 120/234] Added experiments file and supporting files for comparing meta-strategies on hyperparameter problems --- .../dummy.cu | 0 .../metatuning_dual_annealing.json | 50 +++++++++++ .../metatuning_genetic_algorithm.json | 68 +++++++++++++++ .../metatuning_pso.json | 74 ++++++++++++++++ .../metatuning_simulated_annealing.json | 68 +++++++++++++++ .../compare_hypertuners_metastrategy.json | 87 +++++++++++++++++++ 6 files changed, 347 insertions(+) create mode 100644 cached_data_used/hyperparametertuning_metastrategies/dummy.cu create mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json create mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json create mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json create mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json create mode 100644 experiment_files/compare_hypertuners_metastrategy.json diff --git a/cached_data_used/hyperparametertuning_metastrategies/dummy.cu b/cached_data_used/hyperparametertuning_metastrategies/dummy.cu new file mode 100644 index 0000000..e69de29 diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json new file mode 100644 index 0000000..79006d0 --- /dev/null +++ b/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json @@ -0,0 +1,50 @@ +{ + "General": { + "BenchmarkName": "hyperparamtuning_dual_annealing", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "method", + "Type": "string", + "Values": "['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr']", + "Default": "COBYLA" + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "Hypertuner", + "BenchmarkName": "hyperparamtuning_dual_annealing", + "KernelName": "hyperparamtuning_dual_annealing", + "KernelFile": "dummy.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 25000, + 2048, + 1 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [] + } +} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json new file mode 100644 index 0000000..9cf4adf --- /dev/null +++ b/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json @@ -0,0 +1,68 @@ +{ + "General": { + "BenchmarkName": "hyperparamtuning_genetic_algorithm", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "method", + "Type": "string", + "Values": "['single_point', 'two_point', 'uniform', 'disruptive_uniform']", + "Default": "uniform" + }, + { + "Name": "popsize", + "Type": "int", + "Values": "[10, 20, 30]", + "Default": 20 + }, + { + "Name": "maxiter", + "Type": "int", + "Values": "[50, 100, 150]", + "Default": 100 + }, + { + "Name": "mutation_chance", + "Type": "int", + "Values": "[5, 10, 20]", + "Default": 10 + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "Hypertuner", + "BenchmarkName": "hyperparamtuning_genetic_algorithm", + "KernelName": "hyperparamtuning_genetic_algorithm", + "KernelFile": "dummy.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 25000, + 2048, + 1 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [] + } +} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json new file mode 100644 index 0000000..19889bd --- /dev/null +++ b/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json @@ -0,0 +1,74 @@ +{ + "General": { + "BenchmarkName": "hyperparamtuning_psog", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "popsize", + "Type": "int", + "Values": "[10, 20, 30]", + "Default": 20 + }, + { + "Name": "popsize", + "Type": "int", + "Values": "[10, 20, 30]", + "Default": 20 + }, + { + "Name": "maxiter", + "Type": "int", + "Values": "[50, 100, 150]", + "Default": 100 + }, + { + "Name": "c1", + "Type": "float", + "Values": "[1.0, 2.0, 3.0]", + "Default": 2.0 + }, + { + "Name": "c2", + "Type": "float", + "Values": "[0.5, 1.0, 1.5]", + "Default": 1.0 + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "Hypertuner", + "BenchmarkName": "hyperparamtuning_pso", + "KernelName": "hyperparamtuning_pso", + "KernelFile": "dummy.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 25000, + 2048, + 1 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [] + } +} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json new file mode 100644 index 0000000..2692ac9 --- /dev/null +++ b/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json @@ -0,0 +1,68 @@ +{ + "General": { + "BenchmarkName": "hyperparamtuning_dual_annealing", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "T", + "Type": "int", + "Values": "[0.5, 1.0, 1.5]", + "Default": 1.0 + }, + { + "Name": "T_min", + "Type": "int", + "Values": "[0.0001, 0.001, 0.01]", + "Default": 0.001 + }, + { + "Name": "alpha", + "Type": "int", + "Values": "[0.9925, 0.995, 0.9975]", + "Default": 0.995 + }, + { + "Name": "maxiter", + "Type": "int", + "Values": "[1, 2, 3]", + "Default": 2 + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "Hypertuner", + "BenchmarkName": "hyperparamtuning_simulated_annealing", + "KernelName": "hyperparamtuning_simulated_annealing", + "KernelFile": "dummy.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 25000, + 2048, + 1 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [] + } +} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json new file mode 100644 index 0000000..886f020 --- /dev/null +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -0,0 +1,87 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter metastrategies", + "parent_folder": "./hyperparametertuning_metastrategies", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dual_annealing", + "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", + "input_file": "metatuning_dual_annealing.json" + }, + { + "name": "pso", + "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", + "input_file": "metatuning_pso.json" + }, + { + "name": "simulated_annealing", + "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", + "input_file": "metatuning_simulated_annealing.json" + } + ], + "gpus": [ + "A4000" + ], + "pattern_for_full_search_space_filenames": { + "regex": "/Users/fjwillemsen/Downloads/new_0.95_10x50x/hyperparamtuning_paper_bruteforce_${applications}_T4_C.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 2, + "ignore_cache": false, + "cutoff_margin": 0.5 + }, + "search_strategies": [ + { + "name": "meta_pso", + "search_method": "pso", + "display_name": "PSO", + "autotuner": "KernelTuner" + }, + { + "name": "meta_simulated_annealing", + "search_method": "simulated_annealing", + "display_name": "Simulated Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.9, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "compilation", + "framework", + "search_algorithm" + ], + "objective_performance_keys": [ + "score" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From a3f77fcc619cfbd15c8c64899ea9a8916348bcff Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 18 Mar 2025 16:55:03 +0100 Subject: [PATCH 121/234] Improved error reporting and code legibility --- src/autotuning_methodology/curves.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index 0442f95..d191f7e 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -554,6 +554,8 @@ def _get_matching_feval_indices_in_range(self, fevals_range: np.ndarray) -> np.n def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Get the valid fevals and values that are in the given range.""" + if len(fevals_range) == 0: + raise ValueError("The fevals range must have at least one value") target_index: int = fevals_range[-1] - 1 # filter to only get data in the fevals range @@ -714,7 +716,9 @@ def _get_curve_over_time_values_in_range( real_stopping_point_time: float = np.nanmedian(highest_time_per_repeat) # filter to get the time range with a margin on both ends for the isotonic regression - time_range_margin_modifier = 0.25 * (num_repeats / times.size) # give more margin when there are few values relative to the number of repeats + time_range_margin_modifier = 0.25 * ( + num_repeats / times.size + ) # give more margin when there are few values relative to the number of repeats time_range_margin = 0.1 + time_range_margin_modifier time_range_start = time_range[0] * (1 - time_range_margin) time_range_end = time_range[-1] * (1 + time_range_margin) From 0eea8e49bd544253a4601564e7a7f8a1cc27390e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 18 Mar 2025 19:07:51 +0100 Subject: [PATCH 122/234] Added diff_evo tuning to comparison --- .../compare_hypertuners_paper.json | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index dd456d9..d1dbf3b 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -42,6 +42,47 @@ "ignore_cache": false }, "search_strategies": [ + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "randtobest1bin" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner" + }, + { + "name": "diff_evo_inv_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1exp" + } + ], + "display_name": "Differential Evolution untuned", + "autotuner": "KernelTuner", + "color_parent": "diff_evo_tuned" + }, { "name": "dual_annealing_tuned", "search_method": "dual_annealing", From 2b5371cbd55797abb03bb606a49779082f2660ca Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 19 Mar 2025 18:56:53 +0100 Subject: [PATCH 123/234] Created new function in searchsspace_statistics for getting both the start and stop budget with a safe margin --- src/autotuning_methodology/experiments.py | 12 +++-- .../report_experiments.py | 7 ++- .../searchspace_statistics.py | 44 ++++++++++++++++++- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 18bdcc2..301b6e9 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -289,13 +289,11 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: Returns: A modified group dictionary. """ - # get cutoff point - _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time( - statistics_settings["cutoff_percentile"] - ) - # get cutoff point start - _, _, cutoff_point_start_time = searchspace_stats.cutoff_point_fevals_time( - statistics_settings["cutoff_percentile_start"] + # get cutoff points + _, cutoff_point_fevals, cutoff_point_start_time, cutoff_point_time = ( + searchspace_stats.cutoff_point_fevals_time_start_end( + statistics_settings["cutoff_percentile_start"], statistics_settings["cutoff_percentile"] + ) ) # +10% margin, to make sure cutoff_point is reached by compensating for potential non-valid evaluations # noqa: E501 diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 248624e..19e59a3 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -84,11 +84,10 @@ def get_aggregation_data( raise ValueError(f"Could not find '{use_strategy_as_baseline}' in executed strategies") # set the x-axis range - _, cutoff_point_fevals, cutoff_point_time = searchspace_stats.cutoff_point_fevals_time(cutoff_percentile) - _, cutoff_point_fevals_start, cutoff_point_time_start = searchspace_stats.cutoff_point_fevals_time( - cutoff_percentile_start + cutoff_point_fevals_start, cutoff_point_fevals, cutoff_point_time_start, cutoff_point_time = ( + searchspace_stats.cutoff_point_fevals_time_start_end(cutoff_percentile_start, cutoff_percentile) ) - fevals_range = np.arange(start=cutoff_point_fevals_start, stop=cutoff_point_fevals) + fevals_range = np.arange(start=cutoff_point_fevals_start, stop=cutoff_point_fevals + 1) time_range = np.linspace(start=cutoff_point_time_start, stop=cutoff_point_time, num=time_resolution) # get the random baseline diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 08e9e3a..f0712e5 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -300,6 +300,17 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: # exit(0) return objective_performance_at_cutoff_point, fevals_to_cutoff_point + def cutoff_point_time_from_fevals(self, cutoff_point_fevals: int) -> float: + """Calculates the time to the cutoff point from the number of function evaluations. + + Args: + cutoff_point_fevals: the number of function evaluations to reach the cutoff point. + + Returns: + The time to the cutoff point. + """ + return cutoff_point_fevals * self.total_time_median() + def cutoff_point_fevals_time(self, cutoff_percentile: float) -> tuple[float, int, float]: """Calculates the cutoff point. @@ -310,9 +321,40 @@ def cutoff_point_fevals_time(self, cutoff_percentile: float) -> tuple[float, int A tuple of the objective value at cutoff point, fevals to cutoff point, and the mean time to cutoff point. """ cutoff_point_value, cutoff_point_fevals = self.cutoff_point(cutoff_percentile) - cutoff_point_time = cutoff_point_fevals * self.total_time_median() + cutoff_point_time = self.cutoff_point_time_from_fevals(cutoff_point_fevals) return cutoff_point_value, cutoff_point_fevals, cutoff_point_time + def cutoff_point_fevals_time_start_end( + self, cutoff_percentile_start: float, cutoff_percentile: float + ) -> tuple[int, int, float, float]: + """Calculates the cutoff point for both the start and end, and ensures there is enough margin between the two. + + Args: + cutoff_percentile_start: the desired cutoff percentile to reach before starting the plot. + cutoff_percentile: the desired cutoff percentile to reach before stopping. + + Returns: + A tuple of the fevals to cutoff point start and end, and the mean time to cutoff point start and end. + """ + # get the cutoff points + _, cutoff_point_fevals_start = self.cutoff_point(cutoff_percentile_start) + _, cutoff_point_fevals_end = self.cutoff_point(cutoff_percentile) + + # apply a safe margin if needed + if cutoff_point_fevals_end - cutoff_point_fevals_start == 0: + if cutoff_point_fevals_start == 0: + cutoff_point_fevals_end += 2 + else: + cutoff_point_fevals_end += 1 + cutoff_point_fevals_start -= 1 + + # get the times + cutoff_point_time_start = self.cutoff_point_time_from_fevals(cutoff_point_fevals_start) + cutoff_point_time_end = self.cutoff_point_time_from_fevals(cutoff_point_fevals_end) + + # return the values + return cutoff_point_fevals_start, cutoff_point_fevals_end, cutoff_point_time_start, cutoff_point_time_end + def get_valid_filepath(self) -> Path: """Returns the filepath to the Searchspace statistics .json file if it exists. From 057a3e7a1c10d3236e8e8184f55a88ea6c29a11a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 11:33:39 +0100 Subject: [PATCH 124/234] Added new heatmap experiment files, updated main experiment file --- .../compare_hypertuners_paper.json | 53 +----- ...mpare_hypertuners_paper_heatmaps_left.json | 159 ++++++++++++++++++ ...pare_hypertuners_paper_heatmaps_right.json | 159 ++++++++++++++++++ 3 files changed, 319 insertions(+), 52 deletions(-) create mode 100644 experiment_files/compare_hypertuners_paper_heatmaps_left.json create mode 100644 experiment_files/compare_hypertuners_paper_heatmaps_right.json diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index d1dbf3b..56a8db6 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -205,55 +205,6 @@ "display_name": "PSO untuned", "autotuner": "KernelTuner", "color_parent": "pso_tuned" - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.5 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing tuned", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_inv_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.0 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing untuned", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_tuned" } ], "statistics_settings": { @@ -301,9 +252,7 @@ "y_axis_value_types": [ "gpus" ], - "cmin": -2.5, - "include_y_labels": true, - "include_colorbar": true + "cmin": -2.5 }, { "scope": "search_strategy", diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json new file mode 100644 index 0000000..c32ed6c --- /dev/null +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -0,0 +1,159 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning_milo", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json" + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json" + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json" + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json" + } + ], + "gpus": [ + "W6600", + "A6000", + "A100", + "A4000", + "MI250X" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 4, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "diff_evo_inv_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1exp" + } + ], + "display_name": "Differential Evolution untuned", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_inv_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "Nelder-Mead" + } + ], + "display_name": "Dual Annealing untuned", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_inv_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "two_point" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm untuned", + "autotuner": "KernelTuner" + }, + { + "name": "pso_inv_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 10 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "c1", + "value": 1.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO untuned", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -2.5, + "include_y_labels": true, + "include_colorbar": false + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json new file mode 100644 index 0000000..17aa2d3 --- /dev/null +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -0,0 +1,159 @@ +{ + "version": "1.1.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning_milo", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json" + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json" + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json" + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json" + } + ], + "gpus": [ + "W6600", + "A6000", + "A100", + "A4000", + "MI250X" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 50, + "samples": 32, + "minimum_number_of_valid_search_iterations": 4, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "randtobest1bin" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "COBYLA" + } + ], + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "uniform" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm tuned", + "autotuner": "KernelTuner" + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 1.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "minimization": true, + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.5, + "cutoff_type": "fevals", + "objective_time_keys": [ + "all" + ], + "objective_performance_keys": [ + "time" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -2.5, + "include_y_labels": false, + "include_colorbar": true + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 733f15b03499dd8b1d5ef49db139ea509b5b480a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 11:39:50 +0100 Subject: [PATCH 125/234] Improved heatmap plotting to support left / right comparison plots --- .../visualize_experiments.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 76ba03a..f528f73 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -372,7 +372,7 @@ def __init__( cmin = plot.get("cmin", -10.0) # colorbar lower limit cmax = plot.get("cmax", 1.0) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar - include_y_labels = plot.get("include_y_labels", True) + include_y_labels = plot.get("include_y_labels", None) include_colorbar = plot.get("include_colorbar", True) if vmin != -10.0: warnings.warn( @@ -506,14 +506,17 @@ def norm_color_val(v): } x_ticks = label_data[x_type][0] y_ticks = label_data[y_type][0] + figsize = None if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() + figsize = (9, 5) elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) if x_type == "gpus": plot_data = np.transpose(plot_data) + figsize = (5, 3.5) else: raise NotImplementedError( f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." @@ -527,7 +530,7 @@ def norm_color_val(v): # set up the plot fig, axs = plt.subplots( - ncols=1, figsize=(7, 4), dpi=300 + ncols=1, figsize=figsize, dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] @@ -539,16 +542,19 @@ def norm_color_val(v): # plot the heatmap axs[0].set_xlabel(label_data[x_type][1]) axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=0) - if include_y_labels: + if include_y_labels is True or None: axs[0].set_ylabel(label_data[y_type][1]) axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) - else: + if include_y_labels is True: + # axs[0].yaxis.set_label_position("right") + axs[0].yaxis.tick_right() + elif include_y_labels is False: axs[0].set_yticks(ticks=np.arange(len(y_ticks))) axs[0].tick_params(labelleft=False) hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") # plot the colorbar - if not include_colorbar: + if include_colorbar is True: cbar = fig.colorbar(hm) if cmin != vmin or cmax != vmax: cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits From 7393a1450e24b22799ec49244080eed6c9cf5aff Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 14:57:37 +0100 Subject: [PATCH 126/234] Raised an error when optimum is in baseline --- src/autotuning_methodology/visualize_experiments.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 8e29b7d..e2646ef 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -919,13 +919,12 @@ def plot_strategies( confidence_level: float = plot_settings.get("confidence_level", 0.95) absolute_optimum = searchspace_stats.total_performance_absolute_optimum() median = searchspace_stats.total_performance_median() - optimum_median_difference = absolute_optimum - median def normalize(curve): """Min-max normalization with median as min and absolute optimum as max.""" if curve is None: return None - return (curve - median) / optimum_median_difference + return (curve - median) / (absolute_optimum - median) def normalize_multiple(curves: list) -> tuple: """Normalize multiple curves at once.""" @@ -944,6 +943,13 @@ def normalize_multiple(curves: list) -> tuple: ax.axhline(0, label="baseline trajectory", color="black", ls="--") elif y_type == "normalized" or y_type == "baseline" or y_type == "absolute": baseline = baseline_curve.get_curve(x_axis_range, x_type) + if absolute_optimum in baseline: + raise ValueError( + f"The optimum {absolute_optimum} is in the baseline, this will cause zero division problems" + ) + # cut_at_index = np.argmax(baseline == absolute_optimum) + # baseline = baseline[:cut_at_index] + # x_axis_range = x_axis_range[:cut_at_index] if y_type == "normalized": baseline = normalize(baseline) ax.plot(x_axis_range, baseline, label="Calculated baseline", color="black", ls="--") From 0af8700018ed8542ec51b9058cd18977c0869090 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 14:58:07 +0100 Subject: [PATCH 127/234] Set the colorbar limit defaults to color range limits --- src/autotuning_methodology/visualize_experiments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index e2646ef..97ea728 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -369,8 +369,8 @@ def __init__( bins = plot.get("bins", 10) vmin = plot.get("vmin", -10.0) # color range lower limit vmax = plot.get("vmax", 1.0) # color range upper limit - cmin = plot.get("cmin", -10.0) # colorbar lower limit - cmax = plot.get("cmax", 1.0) # colorbar upper limit + cmin = plot.get("cmin", vmin) # colorbar lower limit + cmax = plot.get("cmax", vmax) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar if vmin != -10.0: warnings.warn( From 48465bcb665ca6e71d0af8ba6216e03ffe1dc951 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 14:58:43 +0100 Subject: [PATCH 128/234] Raise an error on zero division --- src/autotuning_methodology/baseline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index 232ca6f..051a328 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -39,7 +39,14 @@ def get_standardised_curves(self, range: np.ndarray, strategy_curves: list[np.nd standardised_curves.append(None) continue assert strategy_curve.shape == random_curve.shape, "strategy_curve shape must match random_curve shape" - standardised_curve = (strategy_curve - random_curve) / (absolute_optimum - random_curve) + division = absolute_optimum - random_curve + + # check for division by zero + if 0.0 in division: + raise ValueError(f"Division by zero encountered, first at index {np.argmax(division == 0.0)}.") + + # calculate the standardised curve + standardised_curve = (strategy_curve - random_curve) / division standardised_curves.append(standardised_curve) return tuple(standardised_curves) From da4eda261d4f7083bd57a3f8d48683c8e4477e9f Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 14:59:54 +0100 Subject: [PATCH 129/234] Adjusted path in experiments defaults to make tests work --- src/autotuning_methodology/experiments_defaults.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index c4f0ac2..9d56290 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -20,7 +20,7 @@ "RTX_2080_Ti" ], "pattern_for_full_search_space_filenames": { - "regex": "../../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + "regex": "../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, "repeats": 25, From 539bbcffba4b2d2363151a4e1f4a67506768d28e Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 15:01:45 +0100 Subject: [PATCH 130/234] Auto-downscaling of cutoff points that equal 1.0, automatic application of safe margin to start and end fevals cutoff. --- .../searchspace_statistics.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index f0712e5..675441a 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -5,6 +5,7 @@ import json from math import ceil, floor from pathlib import Path +from warnings import warn import matplotlib.pyplot as plt import numpy as np @@ -290,6 +291,15 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: i = next( x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= objective_performance_at_cutoff_point ) + if cutoff_percentile != 1.0 and inverted_sorted_performance_arr[i] == self.total_performance_absolute_optimum(): + if i == 0: + raise ValueError( + f"The optimum is directly reached ({inverted_sorted_performance_arr[i]})", + inverted_sorted_performance_arr, + ) + else: + i = i - 1 + warn(f"Scaled down cutoff point as {cutoff_percentile} is equal to optimum (1.0) for this distribution") # In case of x <= (1+p) * f_opt # i = next(x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= (1 + (1 - cutoff_percentile)) * arr[-1]) # noqa: E501 # In case of p*x <= f_opt @@ -341,12 +351,14 @@ def cutoff_point_fevals_time_start_end( _, cutoff_point_fevals_end = self.cutoff_point(cutoff_percentile) # apply a safe margin if needed - if cutoff_point_fevals_end - cutoff_point_fevals_start == 0: + if cutoff_point_fevals_end - cutoff_point_fevals_start < 2: if cutoff_point_fevals_start == 0: - cutoff_point_fevals_end += 2 + cutoff_point_fevals_end = min(self.cutoff_point(1.0)[0], cutoff_point_fevals_end + 2) else: - cutoff_point_fevals_end += 1 + cutoff_point_fevals_end = min(self.cutoff_point(1.0)[0], cutoff_point_fevals_end + 1) cutoff_point_fevals_start -= 1 + if cutoff_point_fevals_end - cutoff_point_fevals_start == 0: + raise ValueError("Cutoff point start and end are the same") # get the times cutoff_point_time_start = self.cutoff_point_time_from_fevals(cutoff_point_fevals_start) From b1532412402e781254bf248c470df2ea84b416a0 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 15:02:24 +0100 Subject: [PATCH 131/234] Added diff_evo to metastrategy for hyperparamtuning comparison --- .../metatuning_diff_evo.json | 62 +++++++++++++++++++ .../compare_hypertuners_metastrategy.json | 34 ++++++++-- 2 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json new file mode 100644 index 0000000..4a00666 --- /dev/null +++ b/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json @@ -0,0 +1,62 @@ +{ + "General": { + "BenchmarkName": "hyperparamtuning_diff_evo", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "method", + "Type": "string", + "Values": "['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'best2bin', 'rand2bin', 'rand1bin']", + "Default": "randtobest1bin" + }, + { + "Name": "popsize", + "Type": "int", + "Values": "[10, 20, 30]", + "Default": 20 + }, + { + "Name": "maxiter", + "Type": "int", + "Values": "[50, 100, 150]", + "Default": 100 + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "Hypertuner", + "BenchmarkName": "hyperparamtuning_diff_evo", + "KernelName": "hyperparamtuning_diff_evo", + "KernelFile": "dummy.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 25000, + 2048, + 1 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [] + } +} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index 886f020..aa0395c 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -5,15 +5,20 @@ "experimental_groups_defaults": { "applications": [ { - "name": "dual_annealing", + "name": "diff_evo", "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", - "input_file": "metatuning_dual_annealing.json" + "input_file": "metatuning_diff_evo.json" }, { "name": "pso", "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", "input_file": "metatuning_pso.json" }, + { + "name": "genetic_algorithm", + "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", + "input_file": "metatuning_genetic_algorithm.json" + }, { "name": "simulated_annealing", "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", @@ -24,22 +29,39 @@ "A4000" ], "pattern_for_full_search_space_filenames": { - "regex": "/Users/fjwillemsen/Downloads/new_0.95_10x50x/hyperparamtuning_paper_bruteforce_${applications}_T4_C.json" + "regex": "/Users/fjwillemsen/Downloads/new_0.95_10x50x/hyperparamtuning_paper_bruteforce_${applications}_C_T4.json" }, "stochastic": true, "repeats": 50, "samples": 32, "minimum_number_of_valid_search_iterations": 2, - "ignore_cache": false, - "cutoff_margin": 0.5 + "ignore_cache": false }, "search_strategies": [ + { + "name": "meta_diff_evo", + "search_method": "diff_evo", + "display_name": "Differential Evolution", + "autotuner": "KernelTuner" + }, + { + "name": "meta_dual_annealing", + "search_method": "dual_annealing", + "display_name": "Dual Annealing", + "autotuner": "KernelTuner" + }, { "name": "meta_pso", "search_method": "pso", "display_name": "PSO", "autotuner": "KernelTuner" }, + { + "name": "meta_genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, { "name": "meta_simulated_annealing", "search_method": "simulated_annealing", @@ -49,7 +71,7 @@ ], "statistics_settings": { "minimization": true, - "cutoff_percentile": 0.9, + "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.5, "cutoff_type": "fevals", "objective_time_keys": [ From 00bd081294f875de9aeac56570de4e01e9962c62 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 15:02:34 +0100 Subject: [PATCH 132/234] Updated benchmark hub --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 13bdcc9..b3f0d1a 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 13bdcc9660586f2ecb2e6c9f63909a5138d614f3 +Subproject commit b3f0d1a32408bfe26df77bb914fb216616342a33 From 58e0c3573d57a05a55150c885efd80d98873968c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 20 Mar 2025 15:23:16 +0100 Subject: [PATCH 133/234] Issue a warning if heatmap data is outside the visible colorbar range --- .../visualize_experiments.py | 57 ++++++++++++++----- .../integration/mockfiles/test.json | 4 +- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 31e944c..eee9323 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -488,13 +488,18 @@ def norm_color_val(v): "GPUs", ), "applications": ( - list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), + list( + dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) + ), "Applications", ), "searchspaces": ( list( dict.fromkeys( - [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] + [ + f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") + for t in strategy_data + ] ) ), "Searchspaces", @@ -507,13 +512,19 @@ def norm_color_val(v): x_ticks = label_data[x_type][0] y_ticks = label_data[y_type][0] figsize = None - if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): + if (x_type == "time" and y_type == "searchspaces") or ( + x_type == "searchspaces" and y_type == "time" + ): plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() figsize = (9, 5) - elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): - plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) + elif (x_type == "gpus" and y_type == "applications") or ( + y_type == "gpus" and x_type == "applications" + ): + plot_data = np.reshape( + plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) + ) if x_type == "gpus": plot_data = np.transpose(plot_data) figsize = (5, 3.5) @@ -522,11 +533,16 @@ def norm_color_val(v): f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." ) - # validate the data + # validate the data is within the vmin-vmax range and visible colorbar range outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + outside_visible_range = np.where(np.logical_or(plot_data < cmin, plot_data > cmax)) + if not (len(outside_visible_range[0]) == 0 and len(outside_visible_range[1]) == 0): + warnings.warn( + f"There are values outside of the visible colorbar range ({cmin}, {cmax}): {plot_data[outside_visible_range]} ({outside_visible_range})" + ) # set up the plot fig, axs = plt.subplots( @@ -551,7 +567,9 @@ def norm_color_val(v): elif include_y_labels is False: axs[0].set_yticks(ticks=np.arange(len(y_ticks))) axs[0].tick_params(labelleft=False) - hm = axs[0].imshow(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto") + hm = axs[0].imshow( + plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto" + ) # plot the colorbar if include_colorbar is True: @@ -602,8 +620,8 @@ def norm_color_val(v): # title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" # fig.canvas.manager.set_window_title(title) # if not save_figs: - # fig.suptitle(title) - + # fig.suptitle(title) + for comparison in comparisons: strategy_names = comparisons["strategies"] strategies = [s for s in self.strategies if s["name"]] @@ -623,13 +641,18 @@ def norm_color_val(v): "GPUs", ), "applications": ( - list(dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data])), + list( + dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) + ), "Applications", ), "searchspaces": ( list( dict.fromkeys( - [f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") for t in strategy_data] + [ + f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") + for t in strategy_data + ] ) ), "Searchspaces", @@ -641,12 +664,18 @@ def norm_color_val(v): } x_ticks = label_data[x_type][0] y_ticks = label_data[y_type][0] - if (x_type == "time" and y_type == "searchspaces") or (x_type == "searchspaces" and y_type == "time"): + if (x_type == "time" and y_type == "searchspaces") or ( + x_type == "searchspaces" and y_type == "time" + ): plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) if x_type == "searchspaces": plot_data = plot_data.transpose() - elif (x_type == "gpus" and y_type == "applications") or (y_type == "gpus" and x_type == "applications"): - plot_data = np.reshape(plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0]))) + elif (x_type == "gpus" and y_type == "applications") or ( + y_type == "gpus" and x_type == "applications" + ): + plot_data = np.reshape( + plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) + ) if x_type == "gpus": plot_data = np.transpose(plot_data) else: diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index af03c39..8bc4e35 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -75,8 +75,7 @@ "y_axis_value_types": [ "gpus" ], - "vmin": -2.5, - "vmax": 1.2 + "cmin": -2.5 }, { "scope": "search_strategy", @@ -87,7 +86,6 @@ "y_axis_value_types": [ "searchspaces" ], - "vmin": -2.5, "bins": 10 }, { From 62aadb0170489b558c31e2bafd4a46e02c3a0e7a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 18:55:35 +0100 Subject: [PATCH 134/234] Added numpy <2 restriction because of Kernel Tuner --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7c33c4d..c060e48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] # https://pypi.org/classifiers/ dependencies = [ - "numpy >= 1.22.4", + "numpy (>=1.22.4,<2.0.0)", "scipy >= 1.10.1", "scikit-learn >= 1.0.2", "matplotlib >= 3.7.1", From fd32d31a86d65da897210ffaf1226525b268594d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 19:06:46 +0100 Subject: [PATCH 135/234] Implemented 'minimum_fraction_of_budget_valid' in experiments standard --- experiment_files/convert_old_to_new_format.py | 3 +- .../experiments_defaults.json | 4 +- src/autotuning_methodology/runner.py | 24 +++++++++++- .../schemas/experiments.json | 37 +++++++++++++++++-- .../integration/mockfiles/test.json | 3 +- 5 files changed, 62 insertions(+), 9 deletions(-) diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py index c054dac..f238ada 100644 --- a/experiment_files/convert_old_to_new_format.py +++ b/experiment_files/convert_old_to_new_format.py @@ -18,7 +18,7 @@ # convert the dictionary to the new format new_experiment = { - "version": "1.1.0", + "version": "1.2.0", "name": old_experiment["name"], "parent_folder": f"./{old_experiment['folder_id']}", "experimental_groups_defaults": { @@ -37,6 +37,7 @@ "stochastic": old_experiment["strategy_defaults"]["stochastic"], "repeats": old_experiment["strategy_defaults"]["repeats"], "samples": old_experiment["strategy_defaults"]["iterations"], + "minimum_fraction_of_budget_valid": old_experiment.get("minimum_fraction_of_budget_valid", 0.5), "minimum_number_of_valid_search_iterations": old_experiment["strategy_defaults"][ "minimum_number_of_evaluations" ], diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 9d56290..44555f8 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "", "parent_folder": ".", "experimental_groups_defaults": { @@ -25,7 +25,7 @@ "stochastic": true, "repeats": 25, "samples": 32, - "minimum_number_of_valid_search_iterations": 10, + "minimum_fraction_of_budget_valid": 0.5, "ignore_cache": false }, "search_strategies": [ diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 651f27f..f600fce 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -217,13 +217,33 @@ def collect_results( input_file: an input json file to tune. group: a dictionary with settings for experimental group. results_description: the ``ResultsDescription`` object to write the results to. - searchspace_stats: the ``SearchspaceStatistics`` object, only used for conversion of imported runs. + searchspace_stats: the ``SearchspaceStatistics`` object, used for conversion of imported runs. profiling: whether profiling statistics must be collected. Returns: The ``ResultsDescription`` object with the results. """ - min_num_evals: int = group["minimum_number_of_valid_search_iterations"] + + # calculate the minimum number of function evaluations that must be valid + minimum_fraction_of_budget_valid = group.get("minimum_fraction_of_budget_valid", None) + if minimum_fraction_of_budget_valid is not None: + assert isinstance(minimum_fraction_of_budget_valid, float) + assert 0.0 < minimum_fraction_of_budget_valid <= 1.0 + max_fevals = None + budget = group['budget'] + if "max_fevals" in budget: + max_fevals = budget['max_fevals'] + elif "time_limit" in budget: + time_limit = budget['time_limit'] + time_per_feval = self.searchspace_stats.get_time_per_feval("mean_per_feval") + max_fevals = max(round(time_limit / time_per_feval), 2) + else: + raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") + min_num_evals = max(round(minimum_fraction_of_budget_valid * min(max_fevals, searchspace_stats.size)), 2) + if "minimum_number_of_valid_search_iterations" in group: + warnings.warn(f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the latter takes precedence.") + else: + min_num_evals: int = group["minimum_number_of_valid_search_iterations"] if len(results_description.objective_performance_keys) != 1: raise NotImplementedError( diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index e8b24fd..7a10354 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://example.com/schemas/experiments/v0.1.1.schema.json", - "version": "1.1.0", + "version": "1.2.0", "title": "Experiment", "description": "An experiment setup configuration file", "type": "object", @@ -116,6 +116,13 @@ "minimum": 1, "default": 32 }, + "minimum_fraction_of_budget_valid": { + "description": "The fraction of the total searchspace size that must be non-error, valid configurations for each single run of search algorithm", + "type": "number", + "exclusiveMinimum": 0.0, + "maximum": 1.0, + "default": 0.5 + }, "minimum_number_of_valid_search_iterations": { "description": "How many non-error, valid configurations account for a single run of search algorithm", "type": "integer", @@ -319,15 +326,39 @@ ] }, "vmin": { - "description": "The minimum value on the color scale.", + "description": "The minimum value on the heatmap color scale.", "type": "number", "default": -0.5 }, "vmax": { - "description": "The maximum value on the color scale.", + "description": "The maximum value on the heatmap color scale.", + "type": "number", + "default": 1.0 + }, + "cmin": { + "description": "The minimum value shown in the heatmap colorbar. Must be vmin <= cmin < cmax <= vmax.", + "type": "number", + "default": -0.5 + }, + "cmax": { + "description": "The maximum value shown in the heatmap colorbar. Must be vmin <= cmin < cmax <= vmax", "type": "number", "default": 1.0 }, + "cnum": { + "description": "The number of evenly placed ticks on the heatmap colorbar to display.", + "type": "integer", + "minimum": 2, + "default": 5 + }, + "include_y_labels": { + "description": "Whether to show the y-axis labels on the heatmap. Displayed if not set.", + "type": "boolean" + }, + "include_colorbar": { + "description": "Whether to show the heatmap colorbar. Displayed if not set.", + "type": "boolean" + }, "bins": { "description": "The number of bins to use when plotting over a heatmap over a non-discrete range (e.g. time)", "type": "integer", diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 8bc4e35..4a46737 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Mock run test", "parent_folder": "./test_run_experiment", "experimental_groups_defaults": { @@ -19,6 +19,7 @@ "stochastic": true, "repeats": 10, "samples": 3, + "minimum_fraction_of_budget_valid": 0.5, "minimum_number_of_valid_search_iterations": 20, "ignore_cache": false }, From 9021c6e992520e58dba50e920561f18681aee51a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 20 Mar 2025 19:12:43 +0100 Subject: [PATCH 136/234] Converted experiment files to updated format --- experiment_files/compare_hypertuners_paper.json | 4 ++-- experiment_files/compare_hypertuners_paper_heatmaps_left.json | 4 ++-- .../compare_hypertuners_paper_heatmaps_right.json | 4 ++-- experiment_files/compare_meta_algorithms.json | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 56a8db6..40eca9d 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Compare hyperparameter tuning", "parent_folder": "./hyperparametertuning_milo", "experimental_groups_defaults": { @@ -38,7 +38,7 @@ "stochastic": true, "repeats": 50, "samples": 32, - "minimum_number_of_valid_search_iterations": 4, + "minimum_fraction_of_budget_valid": 0.5, "ignore_cache": false }, "search_strategies": [ diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index c32ed6c..f807b8d 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Compare hyperparameter tuning", "parent_folder": "./hyperparametertuning_milo", "experimental_groups_defaults": { @@ -38,7 +38,7 @@ "stochastic": true, "repeats": 50, "samples": 32, - "minimum_number_of_valid_search_iterations": 4, + "minimum_fraction_of_budget_valid": 0.5, "ignore_cache": false }, "search_strategies": [ diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 17aa2d3..5033216 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Compare hyperparameter tuning", "parent_folder": "./hyperparametertuning_milo", "experimental_groups_defaults": { @@ -38,7 +38,7 @@ "stochastic": true, "repeats": 50, "samples": 32, - "minimum_number_of_valid_search_iterations": 4, + "minimum_fraction_of_budget_valid": 0.5, "ignore_cache": false }, "search_strategies": [ diff --git a/experiment_files/compare_meta_algorithms.json b/experiment_files/compare_meta_algorithms.json index 577f662..b24c23f 100644 --- a/experiment_files/compare_meta_algorithms.json +++ b/experiment_files/compare_meta_algorithms.json @@ -1,5 +1,5 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Compare hyperparameter tuning meta algorithms", "parent_folder": "./hyperparametertuning_meta", "experimental_groups_defaults": { @@ -19,7 +19,7 @@ "stochastic": true, "repeats": 50, "samples": 32, - "minimum_number_of_valid_search_iterations": 2, + "minimum_fraction_of_budget_valid": 0.5, "ignore_cache": false }, "search_strategies": [ From 1e2b2af9e31b4a2780be9dea643415f31120a72d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 11:50:49 +0100 Subject: [PATCH 137/234] Updated hyperparameters --- .../compare_hypertuners_paper.json | 73 ++++++++++++++++--- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 40eca9d..8c8c78a 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -36,9 +36,9 @@ "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 50, + "repeats": 100, "samples": 32, - "minimum_fraction_of_budget_valid": 0.5, + "minimum_fraction_of_budget_valid": 0.1, "ignore_cache": false }, "search_strategies": [ @@ -56,7 +56,7 @@ }, { "name": "method", - "value": "randtobest1bin" + "value": "best1bin" } ], "display_name": "Differential Evolution tuned", @@ -72,11 +72,11 @@ }, { "name": "maxiter", - "value": 100 + "value": 50 }, { "name": "method", - "value": "best1exp" + "value": "best2exp" } ], "display_name": "Differential Evolution untuned", @@ -122,7 +122,7 @@ }, { "name": "method", - "value": "uniform" + "value": "single_point" }, { "name": "mutation_chance", @@ -146,7 +146,7 @@ }, { "name": "method", - "value": "two_point" + "value": "disruptive_uniform" }, { "name": "mutation_chance", @@ -163,11 +163,11 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 20 + "value": 30 }, { "name": "maxiter", - "value": 150 + "value": 100 }, { "name": "c1", @@ -175,7 +175,7 @@ }, { "name": "c2", - "value": 1.5 + "value": 0.5 } ], "display_name": "PSO tuned", @@ -191,7 +191,7 @@ }, { "name": "maxiter", - "value": 150 + "value": 50 }, { "name": "c1", @@ -199,12 +199,61 @@ }, { "name": "c2", - "value": 1.5 + "value": 1.0 } ], "display_name": "PSO untuned", "autotuner": "KernelTuner", "color_parent": "pso_tuned" + }, + { + "name": "simulated_annealing_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.5 + }, + { + "name": "T_min", + "value": 0.0001 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing tuned", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_inv_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.5 + }, + { + "name": "T_min", + "value": 0.01 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 3 + } + ], + "display_name": "Simulated Annealing untuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_tuned" } ], "statistics_settings": { From b618005606792fb9ddbd115d8dcc1fd5ba6b0d8d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 14:38:36 +0100 Subject: [PATCH 138/234] Improved error message and extended heatmap color range --- src/autotuning_methodology/visualize_experiments.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index eee9323..696609b 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -367,16 +367,16 @@ def __init__( x_type = plot_x_value_types[0] y_type = plot_y_value_types[0] bins = plot.get("bins", 10) - vmin = plot.get("vmin", -10.0) # color range lower limit + vmin = plot.get("vmin", -15.0) # color range lower limit vmax = plot.get("vmax", 1.0) # color range upper limit cmin = plot.get("cmin", vmin) # colorbar lower limit cmax = plot.get("cmax", vmax) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar include_y_labels = plot.get("include_y_labels", None) include_colorbar = plot.get("include_colorbar", True) - if vmin != -10.0: + if vmin != -15.0: warnings.warn( - f"Careful: VMin has been changed from -10.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin. Maybe use cmin instead?." + f"Careful: VMin has been changed from -15.0 to {vmin}. This breaks visual comparison compatiblity with plots that do not have the same VMin. Maybe use cmin instead?." ) if vmax != 1.0: warnings.warn( @@ -399,7 +399,7 @@ def norm_color_val(v): cmap = LinearSegmentedColormap.from_list( "my_colormap", [ - (norm_color_val(-10.0), "black"), + (norm_color_val(-15.0), "black"), (norm_color_val(-4.0), "red"), (norm_color_val(-1.0), "orange"), (norm_color_val(0.0), "yellow"), @@ -537,7 +537,7 @@ def norm_color_val(v): outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" outside_visible_range = np.where(np.logical_or(plot_data < cmin, plot_data > cmax)) if not (len(outside_visible_range[0]) == 0 and len(outside_visible_range[1]) == 0): warnings.warn( @@ -687,7 +687,7 @@ def norm_color_val(v): outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range})" + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" else: raise NotImplementedError(f"Invalid {style=}") From 7b8d1eb2307e040b092349fc9a70138ba03b1032 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 14:46:04 +0100 Subject: [PATCH 139/234] Updated dependencies and classifiers --- noxfile.py | 2 +- pyproject.toml | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index b1e111f..c760720 100644 --- a/noxfile.py +++ b/noxfile.py @@ -20,7 +20,7 @@ def lint(session: nox.Session) -> None: # @nox.session # uncomment this line to only run on the current python interpreter -@nox.session(python=["3.9", "3.10", "3.11", "3.12"]) # missing versions can be installed with `pyenv install ...` +@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13"]) # missing versions can be installed with `pyenv install ...` # do not forget check / set the versions with `pyenv global`, or `pyenv local` in case of virtual environment def tests(session: nox.Session) -> None: """Run the tests for the specified Python versions.""" diff --git a/pyproject.toml b/pyproject.toml index c060e48..659a5df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,13 @@ requires = ["flit_core >=3.8.0,<4"] [project] # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata name = "autotuning_methodology" -version = "1.1.0" +version = "1.2.0" authors = [{ name = "Floris-Jan Willemsen", email = "fjwillemsen97@gmail.com" }] description = "Software package easing implementation of the guidelines of the 2024 paper 'A Methodology for Comparing Auto-Tuning Optimization Algorithms' (https://doi.org/10.1016/j.future.2024.05.021). The DOI of this software is https://doi.org/10.5281/zenodo.11243974." keywords = ["autotuning", "auto-tuning", "methodology", "scientific"] readme = "README.md" license = { file = "LICENSE" } -requires-python = ">=3.9" +requires-python = ">=3.9" # NOTE when updating python version, also update classifiers and Nox test versions classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13" ] # https://pypi.org/classifiers/ dependencies = [ "numpy (>=1.22.4,<2.0.0)", @@ -30,7 +31,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.0.0", + "kernel_tuner >= 1.1.0", ] [project.optional-dependencies] From b1e60acd423dd0235ed7fe88a570d297076332ab Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 14:52:21 +0100 Subject: [PATCH 140/234] Applied linting to modified codebase --- noxfile.py | 4 +++- src/autotuning_methodology/curves.py | 8 ++------ src/autotuning_methodology/experiments.py | 1 - src/autotuning_methodology/runner.py | 15 +++++++-------- .../searchspace_statistics.py | 8 ++------ .../integration/test_visualization.py | 4 +--- 6 files changed, 15 insertions(+), 25 deletions(-) diff --git a/noxfile.py b/noxfile.py index c760720..bf5b957 100644 --- a/noxfile.py +++ b/noxfile.py @@ -20,7 +20,9 @@ def lint(session: nox.Session) -> None: # @nox.session # uncomment this line to only run on the current python interpreter -@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13"]) # missing versions can be installed with `pyenv install ...` +@nox.session( + python=["3.9", "3.10", "3.11", "3.12", "3.13"] +) # missing versions can be installed with `pyenv install ...` # do not forget check / set the versions with `pyenv global`, or `pyenv local` in case of virtual environment def tests(session: nox.Session) -> None: """Run the tests for the specified Python versions.""" diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index d191f7e..f1185b6 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -37,16 +37,12 @@ def get_indices_in_distribution( # check whether the distribution is correctly ordered if not skip_dist_check: strictly_ascending_sort = dist[:-1] <= dist[1:] - assert np.all( - strictly_ascending_sort - ), f"""Distribution is not sorted ascendingly, + assert np.all(strictly_ascending_sort), f"""Distribution is not sorted ascendingly, {np.count_nonzero(~strictly_ascending_sort)} violations in {len(dist)} values: {dist}""" # check whether each value of draws (excluding NaN) is in dist if not skip_draws_check: - assert np.all( - np.in1d(draws[~np.isnan(draws)], dist) - ), f""" + assert np.all(np.in1d(draws[~np.isnan(draws)], dist)), f""" Each value in draws should be in dist, but {np.size(draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)])} values of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)]}""" diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 301b6e9..39515c6 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -482,7 +482,6 @@ def execute_experiment(filepath: str, profiling: bool = False): # just iterate over experimental_groups, collect results and write to proper place for group in all_experimental_groups: - print(f" | - running on GPU '{group['gpu']}'") print(f" | - | tuning application '{group['application_name']}'") print(f" | - | - | with settings of experimental group '{group['display_name']}'") diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index f600fce..b16036b 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -223,25 +223,26 @@ def collect_results( Returns: The ``ResultsDescription`` object with the results. """ - # calculate the minimum number of function evaluations that must be valid minimum_fraction_of_budget_valid = group.get("minimum_fraction_of_budget_valid", None) if minimum_fraction_of_budget_valid is not None: assert isinstance(minimum_fraction_of_budget_valid, float) assert 0.0 < minimum_fraction_of_budget_valid <= 1.0 max_fevals = None - budget = group['budget'] + budget = group["budget"] if "max_fevals" in budget: - max_fevals = budget['max_fevals'] + max_fevals = budget["max_fevals"] elif "time_limit" in budget: - time_limit = budget['time_limit'] + time_limit = budget["time_limit"] time_per_feval = self.searchspace_stats.get_time_per_feval("mean_per_feval") max_fevals = max(round(time_limit / time_per_feval), 2) else: raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") min_num_evals = max(round(minimum_fraction_of_budget_valid * min(max_fevals, searchspace_stats.size)), 2) if "minimum_number_of_valid_search_iterations" in group: - warnings.warn(f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the latter takes precedence.") + warnings.warn( + f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the latter takes precedence." + ) else: min_num_evals: int = group["minimum_number_of_valid_search_iterations"] @@ -427,9 +428,7 @@ def get_nan_array() -> np.ndarray: assert ( len(measurements) > 0 ), f"Objective performance key name {key} not in evaluation['measurements'] ({evaluation_measurements})" - assert ( - len(measurements) == 1 - ), f"""Objective performance key name {key} multiply defined + assert len(measurements) == 1, f"""Objective performance key name {key} multiply defined in evaluation['measurements'] ({evaluation_measurements})""" value = measurements[0]["value"] if value is not None and not is_invalid_objective_performance(value): diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 675441a..cf66dbf 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -426,9 +426,7 @@ def _load(self) -> bool: assert ( self.objective_times[key].shape[0] == self.size ), f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" - assert not np.all( - np.isnan(self.objective_times[key]) - ), f"""All values for {key=} are NaN. + assert not np.all(np.isnan(self.objective_times[key])), f"""All values for {key=} are NaN. Likely the experiment did not collect time values for objective_time_keys '{key}'.""" # get the performance values per configuration @@ -447,9 +445,7 @@ def _load(self) -> bool: self.objective_performances[key].shape[0] == self.size ), f"""Should have the same size as results ({self.size}), has {self.objective_performances[key].shape[0]}""" - assert not np.all( - np.isnan(self.objective_performances[key]) - ), f"""All values for {key=} are NaN. + assert not np.all(np.isnan(self.objective_performances[key])), f"""All values for {key=} are NaN. Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" # get the number of repeats diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index cd3ef3e..d59cb42 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -84,6 +84,4 @@ def test_visualize_experiment(): def test_visualized_plot(plot_filepath: Path): """Test whether valid plots have been produced.""" for plot_filepath in plot_filepaths: - assert ( - plot_filepath.exists() - ), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" + assert plot_filepath.exists(), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" From 8ad08f6ec7801d3c43e24189277ad69a0b676209 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 14:53:16 +0100 Subject: [PATCH 141/234] Updated benchmark hub --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index b3f0d1a..f761720 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit b3f0d1a32408bfe26df77bb914fb216616342a33 +Subproject commit f761720f99b9019056d0c926a090c3b4b2712ac8 From 9d3b0d1469ba3ab15607b896884a7fefb0395bce Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 25 Mar 2025 14:57:08 +0100 Subject: [PATCH 142/234] Bumped Kernel Tuner version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 659a5df..6e85b98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.1.0", + "kernel_tuner >= 1.2.0", ] [project.optional-dependencies] From 4cd939439bd5a206e70ab241ca68d3cd5ad50d39 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 26 Mar 2025 14:58:23 +0100 Subject: [PATCH 143/234] Updated heatmap experiment files with new values --- ...mpare_hypertuners_paper_heatmaps_left.json | 40 ++++++++++++++---- ...pare_hypertuners_paper_heatmaps_right.json | 41 +++++++++++++++---- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index f807b8d..e0f78f2 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -36,9 +36,9 @@ "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 50, + "repeats": 100, "samples": 32, - "minimum_fraction_of_budget_valid": 0.5, + "minimum_fraction_of_budget_valid": 0.1, "ignore_cache": false }, "search_strategies": [ @@ -52,11 +52,11 @@ }, { "name": "maxiter", - "value": 100 + "value": 50 }, { "name": "method", - "value": "best1exp" + "value": "best2exp" } ], "display_name": "Differential Evolution untuned", @@ -88,7 +88,7 @@ }, { "name": "method", - "value": "two_point" + "value": "disruptive_uniform" }, { "name": "mutation_chance", @@ -108,7 +108,7 @@ }, { "name": "maxiter", - "value": 150 + "value": 50 }, { "name": "c1", @@ -116,11 +116,35 @@ }, { "name": "c2", - "value": 1.5 + "value": 1.0 } ], "display_name": "PSO untuned", "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_inv_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.5 + }, + { + "name": "T_min", + "value": 0.01 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 3 + } + ], + "display_name": "Simulated Annealing untuned", + "autotuner": "KernelTuner" } ], "statistics_settings": { @@ -146,7 +170,7 @@ "y_axis_value_types": [ "gpus" ], - "cmin": -2.5, + "cmin": -4.0, "include_y_labels": true, "include_colorbar": false } diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 5033216..4b11d05 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -36,9 +36,9 @@ "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" }, "stochastic": true, - "repeats": 50, + "repeats": 100, "samples": 32, - "minimum_fraction_of_budget_valid": 0.5, + "minimum_fraction_of_budget_valid": 0.1, "ignore_cache": false }, "search_strategies": [ @@ -56,7 +56,7 @@ }, { "name": "method", - "value": "randtobest1bin" + "value": "best1bin" } ], "display_name": "Differential Evolution tuned", @@ -88,7 +88,7 @@ }, { "name": "method", - "value": "uniform" + "value": "single_point" }, { "name": "mutation_chance", @@ -104,11 +104,11 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 20 + "value": 30 }, { "name": "maxiter", - "value": 150 + "value": 100 }, { "name": "c1", @@ -116,11 +116,35 @@ }, { "name": "c2", - "value": 1.5 + "value": 0.5 } ], "display_name": "PSO tuned", "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.5 + }, + { + "name": "T_min", + "value": 0.0001 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing tuned", + "autotuner": "KernelTuner" } ], "statistics_settings": { @@ -146,7 +170,8 @@ "y_axis_value_types": [ "gpus" ], - "cmin": -2.5, + "cmin": -4.0, + "cnum": 6, "include_y_labels": false, "include_colorbar": true } From 071d144b445f31934b766462f8810c70a8372dae Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 8 May 2025 16:34:53 +0200 Subject: [PATCH 144/234] Improved calculation of stopping point --- benchmark_hub | 2 +- src/autotuning_methodology/curves.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/benchmark_hub b/benchmark_hub index f761720..921ae1f 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit f761720f99b9019056d0c926a090c3b4b2712ac8 +Subproject commit 921ae1f7dd9a01942b8f9b06600de79fd21d2708 diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index f1185b6..79df865 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -37,12 +37,16 @@ def get_indices_in_distribution( # check whether the distribution is correctly ordered if not skip_dist_check: strictly_ascending_sort = dist[:-1] <= dist[1:] - assert np.all(strictly_ascending_sort), f"""Distribution is not sorted ascendingly, + assert np.all( + strictly_ascending_sort + ), f"""Distribution is not sorted ascendingly, {np.count_nonzero(~strictly_ascending_sort)} violations in {len(dist)} values: {dist}""" # check whether each value of draws (excluding NaN) is in dist if not skip_draws_check: - assert np.all(np.in1d(draws[~np.isnan(draws)], dist)), f""" + assert np.all( + np.in1d(draws[~np.isnan(draws)], dist) + ), f""" Each value in draws should be in dist, but {np.size(draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)])} values of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)]}""" @@ -704,12 +708,20 @@ def _get_curve_over_time_values_in_range( times: np.ndarray = _times[nan_mask].reshape(-1, num_repeats) values: np.ndarray = _values[nan_mask].reshape(-1, num_repeats) - # get the highest time of each run of the algorithm, take the median + # find the stopping point times_no_nan = times times_no_nan[np.isnan(values)] = np.nan # to count only valid configurations towards highest time highest_time_per_repeat = np.nanmax(times_no_nan, axis=0) assert highest_time_per_repeat.shape[0] == num_repeats - real_stopping_point_time: float = np.nanmedian(highest_time_per_repeat) + highest_time_per_repeat = np.sort(highest_time_per_repeat) + # get the highest time of each run of the algorithm, take the average, stopping point is next highest time + try: + real_stopping_point_time: float = highest_time_per_repeat[ + np.nanmedian(highest_time_per_repeat) < highest_time_per_repeat + ][0] + except IndexError: + # there is no next highest time, so we return the last time in the range + real_stopping_point_time = time_range[-1] # filter to get the time range with a margin on both ends for the isotonic regression time_range_margin_modifier = 0.25 * ( From fe8a23c429bf98dd5fe5cacc17ded5c42a4bdfda Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 9 May 2025 12:28:44 +0200 Subject: [PATCH 145/234] Improved type error message raised when having inner arrays in cache files --- .../searchspace_statistics.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index cf66dbf..88238bc 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -113,11 +113,16 @@ def to_valid_array( # if the value is an array, sum the valid values array = value list_to_sum = list(v for v in array if is_not_invalid_value(v, performance)) - values[value_index] = ( - sum(list_to_sum) - if len(list_to_sum) > 0 and is_not_invalid_value(sum(list_to_sum), performance) - else np.nan - ) + try: + values[value_index] = ( + sum(list_to_sum) + if len(list_to_sum) > 0 and is_not_invalid_value(sum(list_to_sum), performance) + else np.nan + ) + except TypeError as e: + raise TypeError( + f"Invalid type for {key=}, {value=}, {list_to_sum=}, {values=}, {performance=}, {from_time_unit=} ({e})" + ) assert all(isinstance(v, (int, float)) for v in values) return np.array(values) From 53125bb941e92abc5f70ce30fd52af1405f88bfe Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 9 May 2025 12:28:58 +0200 Subject: [PATCH 146/234] Improved W7800 cachefiles --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 921ae1f..65853fd 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 921ae1f7dd9a01942b8f9b06600de79fd21d2708 +Subproject commit 65853fd897b28bb32989db5746684b2881ce021a From d295126fdaa86ce0f7fac368ae9d86ca4303ba40 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 9 May 2025 12:29:13 +0200 Subject: [PATCH 147/234] Added W7800 to experiment files --- experiment_files/compare_hypertuners_paper.json | 3 ++- experiment_files/compare_hypertuners_paper_heatmaps_left.json | 3 ++- experiment_files/compare_hypertuners_paper_heatmaps_right.json | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 8c8c78a..fee6893 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -30,7 +30,8 @@ "A6000", "A100", "A4000", - "MI250X" + "MI250X", + "W7800" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index e0f78f2..0a5c836 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -30,7 +30,8 @@ "A6000", "A100", "A4000", - "MI250X" + "MI250X", + "W7800" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 4b11d05..4c5849b 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -30,7 +30,8 @@ "A6000", "A100", "A4000", - "MI250X" + "MI250X", + "W7800" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" From ceec2f8b4c688f2530cf38aefc75ec00b8e5ecdf Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 11 May 2025 22:05:10 +0200 Subject: [PATCH 148/234] Added optional visual limit for aggregate plot, changed figure size --- src/autotuning_methodology/visualize_experiments.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 696609b..83ed17e 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -696,6 +696,7 @@ def norm_color_val(v): # get settings scope: str = plot["scope"] style: str = plot["style"] + vmin: float = plot.get("vmin", None) # visual range lower limit if scope != "aggregate": continue if style != "line": @@ -703,7 +704,7 @@ def norm_color_val(v): # plot the aggregation if continue_after_comparison or not (compare_baselines or compare_split_times): fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 + ncols=1, figsize=(7.5, 5), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] @@ -717,6 +718,8 @@ def norm_color_val(v): self.plot_strategies_aggregated( axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] ) + if vmin is not None: + axs[0].set_ylim(bottom=vmin) fig.tight_layout() if save_figs: filename_path = Path(self.plot_filename_prefix) / "aggregated" From 70977d2425af3c3837247ae20554213d37d7f02e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 11 May 2025 22:18:14 +0200 Subject: [PATCH 149/234] Added support for dividing between train and test set --- .../compare_hypertuners_paper.json | 60 ------------------- ...mpare_hypertuners_paper_heatmaps_left.json | 4 +- ...pare_hypertuners_paper_heatmaps_right.json | 4 +- .../visualize_experiments.py | 6 +- 4 files changed, 10 insertions(+), 64 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index fee6893..2618e3a 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -206,55 +206,6 @@ "display_name": "PSO untuned", "autotuner": "KernelTuner", "color_parent": "pso_tuned" - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.5 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.995 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing tuned", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_inv_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.5 - }, - { - "name": "T_min", - "value": 0.01 - }, - { - "name": "alpha", - "value": 0.995 - }, - { - "name": "maxiter", - "value": 3 - } - ], - "display_name": "Simulated Annealing untuned", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_tuned" } ], "statistics_settings": { @@ -293,17 +244,6 @@ "baseline" ] }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -2.5 - }, { "scope": "search_strategy", "style": "heatmap", diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index 0a5c836..d3f08bb 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -173,7 +173,9 @@ ], "cmin": -4.0, "include_y_labels": true, - "include_colorbar": false + "include_colorbar": false, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 2 } ], "resolution": 1000.0, diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 4c5849b..2a9d95c 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -174,7 +174,9 @@ "cmin": -4.0, "cnum": 6, "include_y_labels": false, - "include_colorbar": true + "include_colorbar": true, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 2 } ], "resolution": 1000.0, diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 83ed17e..8afd35b 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -372,6 +372,8 @@ def __init__( cmin = plot.get("cmin", vmin) # colorbar lower limit cmax = plot.get("cmax", vmax) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar + divide_train_test_axis = plot.get("divide_train_test", False) # whether to add visual indication for train/test split + divide_train_test_after_num = plot.get("divide_train_test_after_num", False) # where to add the visual indication for train/test split include_y_labels = plot.get("include_y_labels", None) include_colorbar = plot.get("include_colorbar", True) if vmin != -15.0: @@ -485,13 +487,13 @@ def norm_color_val(v): label_data = { "gpus": ( list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), - "GPUs", + "[train] GPUs [test]" if plot['divide_train_test_axis'] == "gpus" else "GPUs", ), "applications": ( list( dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) ), - "Applications", + "[train] Applications [test]" if plot['divide_train_test_axis'] == "applications" else "Applications", ), "searchspaces": ( list( From 3db28cd31e1b0103d0bb847b8308b67e0255c48d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 08:17:55 +0200 Subject: [PATCH 150/234] Added drawing a line on heatmap to divide train and test --- src/autotuning_methodology/visualize_experiments.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 8afd35b..ef76dcd 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -569,9 +569,14 @@ def norm_color_val(v): elif include_y_labels is False: axs[0].set_yticks(ticks=np.arange(len(y_ticks))) axs[0].tick_params(labelleft=False) + print(plot_data.shape) hm = axs[0].imshow( - plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto" + plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto", extent=[0,0,plot_data.shape[0],plot_data.shape[1]] ) + if divide_train_test_after_num is not False: + axs[0].axvline( + x=divide_train_test_after_num, color="black", linestyle="--", linewidth=1.5 + ) # plot the colorbar if include_colorbar is True: From 234b410640e32f597e1fc945ecdecfd733d58183 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 12 May 2025 10:06:50 +0200 Subject: [PATCH 151/234] Added visual distinction between train and test in heatmaps --- ...mpare_hypertuners_paper_heatmaps_left.json | 2 +- ...pare_hypertuners_paper_heatmaps_right.json | 5 +-- .../visualize_experiments.py | 44 +++++++++++++++---- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index d3f08bb..e747452 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -175,7 +175,7 @@ "include_y_labels": true, "include_colorbar": false, "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 2 + "divide_train_test_after_num": 3 } ], "resolution": 1000.0, diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 2a9d95c..ba2ec24 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -30,8 +30,7 @@ "A6000", "A100", "A4000", - "MI250X", - "W7800" + "MI250X" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" @@ -176,7 +175,7 @@ "include_y_labels": false, "include_colorbar": true, "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 2 + "divide_train_test_after_num": 3 } ], "resolution": 1000.0, diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index ef76dcd..4f076bd 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -372,8 +372,12 @@ def __init__( cmin = plot.get("cmin", vmin) # colorbar lower limit cmax = plot.get("cmax", vmax) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar - divide_train_test_axis = plot.get("divide_train_test", False) # whether to add visual indication for train/test split - divide_train_test_after_num = plot.get("divide_train_test_after_num", False) # where to add the visual indication for train/test split + divide_train_test_axis = plot.get( + "divide_train_test_axis", False + ) # whether to add visual indication for train/test split + divide_train_test_after_num = plot.get( + "divide_train_test_after_num", False + ) # where to add the visual indication for train/test split include_y_labels = plot.get("include_y_labels", None) include_colorbar = plot.get("include_colorbar", True) if vmin != -15.0: @@ -487,13 +491,21 @@ def norm_color_val(v): label_data = { "gpus": ( list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), - "[train] GPUs [test]" if plot['divide_train_test_axis'] == "gpus" else "GPUs", + ( + "[train] GPUs [test]" + if divide_train_test_axis and divide_train_test_axis.lower() == "gpus" + else "GPUs" + ), ), "applications": ( list( dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) ), - "[train] Applications [test]" if plot['divide_train_test_axis'] == "applications" else "Applications", + ( + "[train] Applications [test]" + if divide_train_test_axis and divide_train_test_axis.lower() == "applications" + else "Applications" + ), ), "searchspaces": ( list( @@ -571,12 +583,26 @@ def norm_color_val(v): axs[0].tick_params(labelleft=False) print(plot_data.shape) hm = axs[0].imshow( - plot_data, vmin=vmin, vmax=vmax, cmap=cmap, interpolation="nearest", aspect="auto", extent=[0,0,plot_data.shape[0],plot_data.shape[1]] + plot_data, + vmin=vmin, + vmax=vmax, + cmap=cmap, + interpolation="nearest", + aspect="auto", + # extent=[-0.5, plot_data.shape[1] + 0.5, -0.5, plot_data.shape[0] + 0.5], ) - if divide_train_test_after_num is not False: - axs[0].axvline( - x=divide_train_test_after_num, color="black", linestyle="--", linewidth=1.5 - ) + if divide_train_test_axis is not False: + # axs[0].set_ylim(plot_data.shape[0] - 0.5, -0.5) # Ensure correct y-axis limits + if x_type == divide_train_test_axis.lower(): + axs[0].axvline( + x=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=1.5 + ) + elif y_type == divide_train_test_axis.lower(): + axs[0].axhline( + y=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=1.5 + ) + else: + raise ValueError(f"{divide_train_test_axis=} not in x ({x_type}) or y ({y_type}) axis") # plot the colorbar if include_colorbar is True: From 059905ca504823b5e0327d3c7a4f40712c3b4d1f Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Mon, 12 May 2025 11:12:24 +0200 Subject: [PATCH 152/234] Added labels to visual distinction between test and train, added real lowest stopping point y-lim for aggregated plot --- .../compare_hypertuners_paper.json | 3 +- ...pare_hypertuners_paper_heatmaps_right.json | 3 +- .../visualize_experiments.py | 98 ++++++++++++++----- 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 2618e3a..1cf2e18 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -257,7 +257,8 @@ }, { "scope": "aggregate", - "style": "line" + "style": "line", + "ylim": "real" } ], "resolution": 1000.0, diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index ba2ec24..9871944 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -30,7 +30,8 @@ "A6000", "A100", "A4000", - "MI250X" + "MI250X", + "W7800" ], "pattern_for_full_search_space_filenames": { "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 4f076bd..5e0c919 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -344,7 +344,7 @@ def __init__( fig.tight_layout() if save_figs: filename_path = Path(self.plot_filename_prefix) / f"{title}_{x_type}".replace(" ", "_") - fig.savefig(filename_path, dpi=300) + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -491,21 +491,13 @@ def norm_color_val(v): label_data = { "gpus": ( list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), - ( - "[train] GPUs [test]" - if divide_train_test_axis and divide_train_test_axis.lower() == "gpus" - else "GPUs" - ), + "GPUs", ), "applications": ( list( dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) ), - ( - "[train] Applications [test]" - if divide_train_test_axis and divide_train_test_axis.lower() == "applications" - else "Applications" - ), + "Applications", ), "searchspaces": ( list( @@ -581,7 +573,6 @@ def norm_color_val(v): elif include_y_labels is False: axs[0].set_yticks(ticks=np.arange(len(y_ticks))) axs[0].tick_params(labelleft=False) - print(plot_data.shape) hm = axs[0].imshow( plot_data, vmin=vmin, @@ -594,13 +585,58 @@ def norm_color_val(v): if divide_train_test_axis is not False: # axs[0].set_ylim(plot_data.shape[0] - 0.5, -0.5) # Ensure correct y-axis limits if x_type == divide_train_test_axis.lower(): + # add the vertical line to the x-axis axs[0].axvline( - x=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=1.5 + x=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=0.8 + ) + # add train and test texts to either side of the x-label + axs[0].text( + x=divide_train_test_after_num - 0.5, + y=-0.5, + s="train", + ha="center", + va="top", + fontsize=10, + ) + axs[0].text( + x=divide_train_test_after_num - 0.5, + y=plot_data.shape[0] - 0.5, + s="test", + ha="center", + va="bottom", + fontsize=10, ) elif y_type == divide_train_test_axis.lower(): + # add the horizontal line to the y-axis axs[0].axhline( - y=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=1.5 + y=divide_train_test_after_num - 0.5, color="black", linestyle="--", linewidth=0.8 ) + if include_y_labels is not False: + # add train and test texts to either side of the y-label + x_loc = -0.02 + y_center = 0.5 + axs[0].text( + x=x_loc, + y=y_center - 0.25 - (len("train") * 0.02), + s="train", + color="grey", + fontsize=8.5, + ha="center", + va="center", + rotation=90, + transform=axs[0].transAxes, + ) + axs[0].text( + x=x_loc, + y=y_center + 0.25 + (len("test") * 0.02), + s="test", + color="grey", + fontsize=8.5, + ha="center", + va="center", + rotation=90, + transform=axs[0].transAxes, + ) else: raise ValueError(f"{divide_train_test_axis=} not in x ({x_type}) or y ({y_type}) axis") @@ -635,7 +671,7 @@ def norm_color_val(v): Path(self.plot_filename_prefix) / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" ) - fig.savefig(filename_path, dpi=300) + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -748,15 +784,20 @@ def norm_color_val(v): fig.suptitle(title) # finalize the figure and save or display it - self.plot_strategies_aggregated( + lowest_real_y_value = self.plot_strategies_aggregated( axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] ) if vmin is not None: - axs[0].set_ylim(bottom=vmin) + if isinstance(vmin, (int, float)): + axs[0].set_ylim(bottom=vmin) + elif vmin == "real": + axs[0].set_ylim(bottom=lowest_real_y_value) + else: + raise NotImplementedError(f"{vmin=} not implemented") fig.tight_layout() if save_figs: filename_path = Path(self.plot_filename_prefix) / "aggregated" - fig.savefig(filename_path, dpi=300) + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -842,7 +883,7 @@ def plot_baselines_comparison( # write to file or show if save_fig: filename_path = Path(self.plot_filename_prefix) / f"{title}_baselines".replace(" ", "_") - plt.savefig(filename_path, dpi=300) + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -928,7 +969,7 @@ def plot_split_times_comparison( # write to file or show if save_fig: filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_{x_type}".replace(" ", "_") - plt.savefig(filename_path, dpi=300) + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -1035,7 +1076,7 @@ def plot_split_times_bar_comparison( # write to file or show if save_fig: filename_path = Path(self.plot_filename_prefix) / f"{title}_split_times_bar".replace(" ", "_") - plt.savefig(filename_path, dpi=300) + plt.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: plt.show() @@ -1255,13 +1296,16 @@ def plot_strategies_aggregated( ax: plt.Axes, aggregation_data, plot_settings: dict, - ): + ) -> float: """Plots all optimization strategies combined accross search spaces. Args: ax: the axis to plot on. aggregation_data: the aggregated data from the various searchspaces. plot_settings: dictionary of additional plot settings. + + Returns: + The lowest performance value of the real stopping point for all strategies. """ # plot the random baseline and absolute optimum ax.axhline(0, label="Calculated baseline", c="black", ls=":") @@ -1280,6 +1324,7 @@ def plot_strategies_aggregated( y_axis_size = strategies_performance[0].shape[0] time_range = np.arange(y_axis_size) plot_errors = True + lowest_real_y_value = 0.0 print("\n-------") print("Quantification of aggregate performance across all search spaces:") for strategy_index, strategy_performance in enumerate(strategies_performance): @@ -1289,6 +1334,14 @@ def plot_strategies_aggregated( color = self.colors[strategy_index] real_stopping_point_fraction = strategies_real_stopping_point_fraction[strategy_index] real_stopping_point_index = round(real_stopping_point_fraction * time_range.shape[0]) + lowest_real_y_value = min( + lowest_real_y_value, + ( + strategy_performance[real_stopping_point_index] + if real_stopping_point_index < time_range.shape[0] + else strategies_performance[time_range.shape[-1]] + ), + ) if real_stopping_point_index <= 0: warnings.warn(f"Stopping point index for {displayname} is at {real_stopping_point_index}") continue @@ -1356,6 +1409,7 @@ def plot_strategies_aggregated( ax.set_ylim(top=1.02) ax.set_xlim((0, y_axis_size)) ax.legend() + return lowest_real_y_value def get_x_axis_label(self, x_type: str, objective_time_keys: list): """Formatter to get the appropriate x-axis label depending on the x-axis type. From dd4510f276afc3eb1d8e4e1e9e593e7b505c5e74 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 16:00:25 +0200 Subject: [PATCH 153/234] Improvements to lowest real value minimum in aggregate plot --- experiment_files/compare_hypertuners_paper.json | 2 +- src/autotuning_methodology/schemas/experiments.json | 7 +++++-- src/autotuning_methodology/visualize_experiments.py | 5 +++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 1cf2e18..183f16f 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -258,7 +258,7 @@ { "scope": "aggregate", "style": "line", - "ylim": "real" + "vmin": "real" } ], "resolution": 1000.0, diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 7a10354..7b27b50 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -326,8 +326,11 @@ ] }, "vmin": { - "description": "The minimum value on the heatmap color scale.", - "type": "number", + "description": "The minimum value on the heatmap color scale or aggregate plot.", + "type": [ + "number", + "string" + ], "default": -0.5 }, "vmax": { diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5e0c919..5133271 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -791,7 +791,7 @@ def norm_color_val(v): if isinstance(vmin, (int, float)): axs[0].set_ylim(bottom=vmin) elif vmin == "real": - axs[0].set_ylim(bottom=lowest_real_y_value) + axs[0].set_ylim(bottom=lowest_real_y_value - (abs(lowest_real_y_value)+1.0) * 0.02) else: raise NotImplementedError(f"{vmin=} not implemented") fig.tight_layout() @@ -1339,9 +1339,10 @@ def plot_strategies_aggregated( ( strategy_performance[real_stopping_point_index] if real_stopping_point_index < time_range.shape[0] - else strategies_performance[time_range.shape[-1]] + else strategy_performance[time_range.shape[0] - 1] ), ) + assert isinstance(lowest_real_y_value, (int, float)), f"Invalid {lowest_real_y_value=}" if real_stopping_point_index <= 0: warnings.warn(f"Stopping point index for {displayname} is at {real_stopping_point_index}") continue From 367a2cfab31dfc3bdf59659d5d92159f826b80b3 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 17:39:32 +0200 Subject: [PATCH 154/234] Removed redundant references to minimization and objective time keys for flexibility --- .../report_experiments.py | 4 ---- .../visualize_experiments.py | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 19e59a3..b91c64b 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -35,7 +35,6 @@ def get_aggregation_data( cutoff_percentile: float, cutoff_percentile_start=0.01, confidence_level=0.95, - minimization: bool = True, time_resolution: int = 1e4, use_strategy_as_baseline=None, ): @@ -47,7 +46,6 @@ def get_aggregation_data( strategies: _description_ results_descriptions: _description_ cutoff_percentile: _description_ - minimization: _description_. Defaults to True. cutoff_percentile_start: _description_. Defaults to 0.01. confidence_level: _description_. Defaults to 0.95. time_resolution: _description_. Defaults to 1e4. @@ -201,7 +199,6 @@ def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None) # get the settings experiment_folderpath = experiment["parent_folder_absolute_path"] - minimization: bool = experiment["statistics_settings"]["minimization"] cutoff_percentile: float = experiment["statistics_settings"]["cutoff_percentile"] cutoff_percentile_start: float = experiment["statistics_settings"]["cutoff_percentile_start"] time_resolution: float = experiment["visualization_settings"]["resolution"] @@ -218,7 +215,6 @@ def get_agg_data(): cutoff_percentile, cutoff_percentile_start, confidence_level, - minimization, time_resolution, use_strategy_as_baseline, ) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5133271..58816d9 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -173,7 +173,6 @@ def __init__( # self.all_experimental_groups are all combinations of gpu+application+search method that got executed self.strategies = self.experiment["search_strategies"] # settings - self.minimization: bool = self.experiment["statistics_settings"]["minimization"] cutoff_percentile: float = self.experiment["statistics_settings"]["cutoff_percentile"] cutoff_percentile_start: float = self.experiment["statistics_settings"]["cutoff_percentile_start"] cutoff_type: str = self.experiment["statistics_settings"]["cutoff_type"] @@ -208,7 +207,6 @@ def __init__( cutoff_percentile, cutoff_percentile_start, confidence_level, - self.minimization, time_resolution, use_strategy_as_baseline, ) @@ -615,10 +613,11 @@ def norm_color_val(v): # add train and test texts to either side of the y-label x_loc = -0.02 y_center = 0.5 + text = "train" axs[0].text( x=x_loc, - y=y_center - 0.25 - (len("train") * 0.02), - s="train", + y=y_center + 0.25 + (len(text) * 0.01), + s=text, color="grey", fontsize=8.5, ha="center", @@ -626,10 +625,11 @@ def norm_color_val(v): rotation=90, transform=axs[0].transAxes, ) + text = "test" axs[0].text( x=x_loc, - y=y_center + 0.25 + (len("test") * 0.02), - s="test", + y=y_center - 0.25 - (len(text) * 0.01), + s=text, color="grey", fontsize=8.5, ha="center", @@ -1284,8 +1284,10 @@ def normalize_multiple(curves: list) -> tuple: ax.set_ylabel(self.y_metric_displayname[f"objective_{y_type}"], fontsize="large") normalized_ylim_margin = 0.02 if y_type == "absolute": - multiplier = 0.99 if self.minimization else 1.01 - ax.set_ylim(absolute_optimum * multiplier, median) + # multiplier = 0.99 if self.minimization else 1.01 + # ax.set_ylim(absolute_optimum * multiplier, median) + # ax.set_ylim(1.0) + pass # elif y_type == 'normalized': # ax.set_ylim((0.0, 1 + normalized_ylim_margin)) elif y_type == "baseline": From 6c0bce9dd7140c9196a7cbde461ddf727961050e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 17:40:01 +0200 Subject: [PATCH 155/234] Updated experiments defaults with new benchmark hub data --- .../experiments_defaults.json | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 44555f8..820b807 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -5,19 +5,30 @@ "experimental_groups_defaults": { "applications": [ { - "name": "convolution", + "name": "dedispersion_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution.json" + "input_file": "dedispersion_milo.json" }, { - "name": "pnpoly", + "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "pnpoly.json" + "input_file": "hotspot_milo.json" + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json" + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json" } ], "gpus": [ - "RTX_3090", - "RTX_2080_Ti" + "A100", + "A4000", + "MI250X" ], "pattern_for_full_search_space_filenames": { "regex": "../autotuning_methodology/benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" From ebf28751d73d048828d55255b5dba8b956b184f7 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 18:36:39 +0200 Subject: [PATCH 156/234] Created a new interface for reading and writing common input and output formats --- .../formats_interface.py | 23 +++ .../searchspace_statistics.py | 194 ++++++++---------- 2 files changed, 112 insertions(+), 105 deletions(-) create mode 100644 src/autotuning_methodology/formats_interface.py diff --git a/src/autotuning_methodology/formats_interface.py b/src/autotuning_methodology/formats_interface.py new file mode 100644 index 0000000..75f61c8 --- /dev/null +++ b/src/autotuning_methodology/formats_interface.py @@ -0,0 +1,23 @@ +"""Interface for reading and writing input and output formats.""" + +def load_T4_format(file_path: Path, validate: True) -> dict: + """Load and optionally validate a T4 format file.""" + with open(filepath, "r", encoding="utf-8") as fh: + # get the cache from the .json file + orig_contents = fh.read() + try: + data: dict = json.loads(orig_contents) + except json.decoder.JSONDecodeError: + contents = orig_contents[:-1] + "}\n}" + try: + data = json.loads(contents) + except json.decoder.JSONDecodeError: + contents = orig_contents[:-2] + "}\n}" + data = json.loads(contents) + + if validate: + # validate it is in T4 format + validate_T4(data) + + # return the T4 data + return data \ No newline at end of file diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 88238bc..eb44c39 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -10,6 +10,7 @@ import matplotlib.pyplot as plt import numpy as np +from autotuning_methodology.formats_interface import load_T4_format from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time, validate_T4 @@ -397,114 +398,97 @@ def _load(self) -> bool: """Load the contents of the full search space file.""" # if not, use a script to create a file with values from KTT output and formatting of KernelTuner filepath = self.get_valid_filepath() - with open(filepath, "r", encoding="utf-8") as fh: - print(f"Loading full search space file {filepath} and initializing the statistics...") - - # get the cache from the .json file - orig_contents = fh.read() - try: - data: dict = json.loads(orig_contents) - except json.decoder.JSONDecodeError: - contents = orig_contents[:-1] + "}\n}" - try: - data = json.loads(contents) - except json.decoder.JSONDecodeError: - contents = orig_contents[:-2] + "}\n}" - data = json.loads(contents) - - # validate it is in T4 format - validate_T4(data) - - metadata: dict = data.get("metadata", {}) - timeunit = metadata.get("timeunit", "seconds") - results: dict = data["results"] - self.results = results - - # get the time values per configuration - self.size = len(data["results"]) - self.objective_times = dict() - for key in self.objective_time_keys: - self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) - assert ( - self.objective_times[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_times[key].ndim}" - assert ( - self.objective_times[key].shape[0] == self.size - ), f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" - assert not np.all(np.isnan(self.objective_times[key])), f"""All values for {key=} are NaN. - Likely the experiment did not collect time values for objective_time_keys '{key}'.""" - - # get the performance values per configuration - self.objective_performances = dict() - for key in self.objective_performance_keys: - self.objective_performances[key] = to_valid_array( - results, - key, - performance=True, - replace_missing_measurement_from_times_key="runtimes" if key == "time" else None, - ) - assert ( - self.objective_performances[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_performances[key].ndim}" - assert ( - self.objective_performances[key].shape[0] == self.size - ), f"""Should have the same size as results ({self.size}), - has {self.objective_performances[key].shape[0]}""" - assert not np.all(np.isnan(self.objective_performances[key])), f"""All values for {key=} are NaN. - Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" - - # get the number of repeats - # TODO is this necessary? number of repeats is given in experiments setup file - # valid_cache_index: int = 0 - # while "times" not in cache_values[valid_cache_index]: - # valid_cache_index += 1 - # self.repeats = len(cache_values[valid_cache_index]["times"]) - - # combine the arrays to the shape [len(objective_keys), self.size] - self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) - assert self.objective_times_array.shape == tuple([len(self.objective_time_keys), self.size]) - self.objective_performances_array = np.array( - list(self.objective_performances[key] for key in self.objective_performance_keys) - ) - assert self.objective_performances_array.shape == tuple([len(self.objective_performance_keys), self.size]) - - # get the totals - self.objective_times_total = nansumwrapper(self.objective_times_array, axis=0) - assert self.objective_times_total.shape == tuple([self.size]) - # more of a test than a necessary assert + data = load_T4_format(filepath, validate=True) + metadata: dict = data.get("metadata", {}) + timeunit = metadata.get("timeunit", "seconds") + results: dict = data["results"] + self.results = results + + # get the time values per configuration + self.size = len(data["results"]) + self.objective_times = dict() + for key in self.objective_time_keys: + self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) assert ( - np.nansum(self.objective_times_array[:, 0]) == self.objective_times_total[0] - ), f"""Sums of objective performances do not match: - {np.nansum(self.objective_times_array[:, 0])} vs. {self.objective_times_total[0]}""" - self.objective_performances_total = nansumwrapper(self.objective_performances_array, axis=0) - assert self.objective_performances_total.shape == tuple([self.size]) - # more of a test than a necessary assert + self.objective_times[key].ndim == 1 + ), f"Should have one dimension, has {self.objective_times[key].ndim}" assert ( - np.nansum(self.objective_performances_array[:, 0]) == self.objective_performances_total[0] - ), f"""Sums of objective performances do not match: - {np.nansum(self.objective_performances_array[:, 0])} vs. {self.objective_performances_total[0]}""" - - # sort - self.objective_times_total_sorted = np.sort( - self.objective_times_total[~np.isnan(self.objective_times_total)] - ) - self.objective_times_number_of_nan = ( - self.objective_times_total.shape[0] - self.objective_times_total_sorted.shape[0] - ) - objective_performances_nan_mask = np.isnan(self.objective_performances_total) - self.objective_performances_number_of_nan = np.count_nonzero(objective_performances_nan_mask) - self.objective_performances_total_sorted = np.sort( - self.objective_performances_total[~objective_performances_nan_mask] - ) - # make sure the best values are at the start, because NaNs are appended to the end - sorted_best_first = ( - self.objective_performances_total_sorted - if self.minimization - else self.objective_performances_total_sorted[::-1] - ) - self.objective_performances_total_sorted_nan = np.concatenate( - (sorted_best_first, [np.nan] * self.objective_performances_number_of_nan) + self.objective_times[key].shape[0] == self.size + ), f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" + assert not np.all(np.isnan(self.objective_times[key])), f"""All values for {key=} are NaN. + Likely the experiment did not collect time values for objective_time_keys '{key}'.""" + + # get the performance values per configuration + self.objective_performances = dict() + for key in self.objective_performance_keys: + self.objective_performances[key] = to_valid_array( + results, + key, + performance=True, + replace_missing_measurement_from_times_key="runtimes" if key == "time" else None, ) + assert ( + self.objective_performances[key].ndim == 1 + ), f"Should have one dimension, has {self.objective_performances[key].ndim}" + assert ( + self.objective_performances[key].shape[0] == self.size + ), f"""Should have the same size as results ({self.size}), + has {self.objective_performances[key].shape[0]}""" + assert not np.all(np.isnan(self.objective_performances[key])), f"""All values for {key=} are NaN. + Likely the experiment did not collect performance values for objective_performance_key '{key}'.""" + + # get the number of repeats + # TODO is this necessary? number of repeats is given in experiments setup file + # valid_cache_index: int = 0 + # while "times" not in cache_values[valid_cache_index]: + # valid_cache_index += 1 + # self.repeats = len(cache_values[valid_cache_index]["times"]) + + # combine the arrays to the shape [len(objective_keys), self.size] + self.objective_times_array = np.array(list(self.objective_times[key] for key in self.objective_time_keys)) + assert self.objective_times_array.shape == tuple([len(self.objective_time_keys), self.size]) + self.objective_performances_array = np.array( + list(self.objective_performances[key] for key in self.objective_performance_keys) + ) + assert self.objective_performances_array.shape == tuple([len(self.objective_performance_keys), self.size]) + + # get the totals + self.objective_times_total = nansumwrapper(self.objective_times_array, axis=0) + assert self.objective_times_total.shape == tuple([self.size]) + # more of a test than a necessary assert + assert ( + np.nansum(self.objective_times_array[:, 0]) == self.objective_times_total[0] + ), f"""Sums of objective performances do not match: + {np.nansum(self.objective_times_array[:, 0])} vs. {self.objective_times_total[0]}""" + self.objective_performances_total = nansumwrapper(self.objective_performances_array, axis=0) + assert self.objective_performances_total.shape == tuple([self.size]) + # more of a test than a necessary assert + assert ( + np.nansum(self.objective_performances_array[:, 0]) == self.objective_performances_total[0] + ), f"""Sums of objective performances do not match: + {np.nansum(self.objective_performances_array[:, 0])} vs. {self.objective_performances_total[0]}""" + + # sort + self.objective_times_total_sorted = np.sort( + self.objective_times_total[~np.isnan(self.objective_times_total)] + ) + self.objective_times_number_of_nan = ( + self.objective_times_total.shape[0] - self.objective_times_total_sorted.shape[0] + ) + objective_performances_nan_mask = np.isnan(self.objective_performances_total) + self.objective_performances_number_of_nan = np.count_nonzero(objective_performances_nan_mask) + self.objective_performances_total_sorted = np.sort( + self.objective_performances_total[~objective_performances_nan_mask] + ) + # make sure the best values are at the start, because NaNs are appended to the end + sorted_best_first = ( + self.objective_performances_total_sorted + if self.minimization + else self.objective_performances_total_sorted[::-1] + ) + self.objective_performances_total_sorted_nan = np.concatenate( + (sorted_best_first, [np.nan] * self.objective_performances_number_of_nan) + ) return True From 06c09ce24b4c07256caa35148b02e8622deb324b Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 21:49:41 +0200 Subject: [PATCH 157/234] Implemented automatic derivation of the optimization objective and direction --- src/autotuning_methodology/experiments.py | 37 ++++++++++++++++--- .../schemas/experiments.json | 14 +++++++ 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 39515c6..f36fa70 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -17,6 +17,7 @@ from autotuning_methodology.runner import collect_results from autotuning_methodology.searchspace_statistics import SearchspaceStatistics from autotuning_methodology.validators import validate_experimentsfile +from autotuning_methodology.utils import load_T4_format PACKAGE_ROOT = Path(__file__).parent.parent.parent @@ -105,7 +106,7 @@ def get_experimental_groups(experiment: dict) -> list[dict]: # set up the directory structure experiment["parent_folder_absolute_path"] = Path(experiment["parent_folder"]).resolve() - # if folder "run" does not exist, create + # if folders "run" and "setup" do not exist, create makedirs(experiment["parent_folder_absolute_path"].joinpath("run"), exist_ok=True) makedirs(experiment["parent_folder_absolute_path"].joinpath("setup"), exist_ok=True) @@ -209,6 +210,28 @@ def generate_all_experimental_groups( gpu, application["name"], ) + + # get the objective performance keys + if "objective_performance_keys" in application: + group["objective_performance_keys"] = application["objective_performance_keys"] + else: + # load the full search space file and derive the objective performance keys + data = load_T4_format(group["full_search_space_file"], validate=True) + objectives = data["results"][0]["objectives"] + assert len(objectives) == 1, "Only one objective is supported for now" + group["objective_performance_keys"] = objectives[0] + + # derive the optimization direction + if "minimization" in application: + group["minimization"] = application["minimization"] + elif "time" in objective: + group["minimization"] = True + elif any(k in objectives[0].lower() for k in ["score", "gflop/s", "gflops", "gb/s"]): + group["minimization"] = False + else: + raise NotImplementedError( + f"Optimization direction can not be automatically inferred from '{objective=}' ({gpu=}, {application=}, {strategy=}). Please set 'minimization' for this application in the experiments file." + ) if group["autotuner"] == "KTT": raise NotImplementedError( @@ -486,6 +509,10 @@ def execute_experiment(filepath: str, profiling: bool = False): print(f" | - | tuning application '{group['application_name']}'") print(f" | - | - | with settings of experimental group '{group['display_name']}'") + # get the experimental group settings # TODO + minimization = experiment["statistics_settings"]["minimization"] + objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"] + # create SearchspaceStatistics for full search space file associated with this group, if it does not exist if any( searchspace_statistics.get(group["gpu"], {}).get(group["application_name"], {}) == null_val @@ -500,9 +527,9 @@ def execute_experiment(filepath: str, profiling: bool = False): searchspace_statistics[group["gpu"]][group["application_name"]] = SearchspaceStatistics( application_name=group["application_name"], device_name=group["gpu"], - minimization=experiment["statistics_settings"]["minimization"], + minimization=minimization, objective_time_keys=objective_time_keys, - objective_performance_keys=experiment["statistics_settings"]["objective_performance_keys"], + objective_performance_keys=objective_performance_keys, full_search_space_file_path=full_search_space_file_path, ) @@ -519,8 +546,8 @@ def execute_experiment(filepath: str, profiling: bool = False): group_display_name=group["display_name"], stochastic=group["stochastic"], objective_time_keys=objective_time_keys, - objective_performance_keys=experiment["statistics_settings"]["objective_performance_keys"], - minimization=experiment["statistics_settings"]["minimization"], + objective_performance_keys=objective_performance_keys, + minimization=minimization, ) # if the strategy is in the cache, use cached data diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 7b27b50..b2472f5 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -61,6 +61,20 @@ }, "input_file": { "type": "string" + }, + "minimization": { + "description": "Whether the optimization objective is to minimize (maximize if false).", + "type": "boolean", + "default": true + }, + "objective_performance_keys": { + "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true, + "minItems": 1 } } } From 9a5ba308ce7068a63f90e721a72e33838249bf71 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 21:56:48 +0200 Subject: [PATCH 158/234] Removed 'minimization'and 'objective_performance_keys' from experiment specifications to infer automatically --- .../experiments_defaults.json | 4 ---- .../schemas/experiments.json | 18 +----------------- .../integration/mockfiles/test.json | 4 ---- .../mockfiles/test_bad_kernel_path.json | 4 ---- .../integration/mockfiles/test_cached.json | 4 ---- .../mockfiles/test_import_runs.json | 4 ---- 6 files changed, 1 insertion(+), 37 deletions(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 820b807..d89bfc2 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -48,15 +48,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, "cutoff_type": "fevals", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index b2472f5..96acffe 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -242,19 +242,12 @@ "description": "Settings for the statistics calculation", "type": "object", "required": [ - "minimization", "cutoff_percentile", "cutoff_percentile_start", "cutoff_type", - "objective_time_keys", - "objective_performance_keys" + "objective_time_keys" ], "properties": { - "minimization": { - "description": "Whether the optimization aims to minimize or maximize", - "type": "boolean", - "default": true - }, "cutoff_percentile": { "description": "Fraction of difference between median and absolute optimum at which to stop the time range", "type": "number", @@ -290,15 +283,6 @@ }, "uniqueItems": true, "minItems": 1 - }, - "objective_performance_keys": { - "description": "The performance key(s) to use as the performance objective. In case of multiple keys, the values are summed.", - "type": "array", - "items": { - "type": "string" - }, - "uniqueItems": true, - "minItems": 1 } } }, diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 4a46737..5b48084 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -32,15 +32,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json index 03af168..d1c9fe6 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_bad_kernel_path.json @@ -31,15 +31,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/tests/autotuning_methodology/integration/mockfiles/test_cached.json b/tests/autotuning_methodology/integration/mockfiles/test_cached.json index 39ef98c..86412fa 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_cached.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_cached.json @@ -31,15 +31,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json index c195a69..4cdaabe 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json +++ b/tests/autotuning_methodology/integration/mockfiles/test_import_runs.json @@ -31,15 +31,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.99, "cutoff_percentile_start": 0.7, "cutoff_type": "fevals", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { From 3b34023b0fb9cba56f4361368a089ef9141954fd Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 May 2025 22:13:34 +0200 Subject: [PATCH 159/234] Various improvements and fixes related to flexible objective and optimization direction --- src/autotuning_methodology/experiments.py | 30 ++++++++++--------- .../formats_interface.py | 6 +++- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index f36fa70..b1d7848 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -17,7 +17,7 @@ from autotuning_methodology.runner import collect_results from autotuning_methodology.searchspace_statistics import SearchspaceStatistics from autotuning_methodology.validators import validate_experimentsfile -from autotuning_methodology.utils import load_T4_format +from autotuning_methodology.formats_interface import load_T4_format PACKAGE_ROOT = Path(__file__).parent.parent.parent @@ -219,12 +219,12 @@ def generate_all_experimental_groups( data = load_T4_format(group["full_search_space_file"], validate=True) objectives = data["results"][0]["objectives"] assert len(objectives) == 1, "Only one objective is supported for now" - group["objective_performance_keys"] = objectives[0] + group["objective_performance_keys"] = objectives # derive the optimization direction if "minimization" in application: group["minimization"] = application["minimization"] - elif "time" in objective: + elif "time" in objectives[0].lower(): group["minimization"] = True elif any(k in objectives[0].lower() for k in ["score", "gflop/s", "gflops", "gb/s"]): group["minimization"] = False @@ -505,14 +505,16 @@ def execute_experiment(filepath: str, profiling: bool = False): # just iterate over experimental_groups, collect results and write to proper place for group in all_experimental_groups: - print(f" | - running on GPU '{group['gpu']}'") - print(f" | - | tuning application '{group['application_name']}'") + # get the experimental group settings + application_name = group["application_name"] + gpu_name = group["gpu"] + minimization = group["minimization"] + objective_performance_keys = group["objective_performance_keys"] + + print(f" | - running on GPU '{gpu_name}'") + print(f" | - | tuning application '{application_name}'") print(f" | - | - | with settings of experimental group '{group['display_name']}'") - # get the experimental group settings # TODO - minimization = experiment["statistics_settings"]["minimization"] - objective_performance_keys = experiment["statistics_settings"]["objective_performance_keys"] - # create SearchspaceStatistics for full search space file associated with this group, if it does not exist if any( searchspace_statistics.get(group["gpu"], {}).get(group["application_name"], {}) == null_val @@ -524,9 +526,9 @@ def execute_experiment(filepath: str, profiling: bool = False): else: full_search_space_file_path = group["converted_full_search_space_file"] - searchspace_statistics[group["gpu"]][group["application_name"]] = SearchspaceStatistics( - application_name=group["application_name"], - device_name=group["gpu"], + searchspace_statistics[gpu_name][application_name] = SearchspaceStatistics( + application_name=application_name, + device_name=gpu_name, minimization=minimization, objective_time_keys=objective_time_keys, objective_performance_keys=objective_performance_keys, @@ -540,8 +542,8 @@ def execute_experiment(filepath: str, profiling: bool = False): results_description = ResultsDescription( run_folder=experiment_folderpath / "run" / group["name"], - application_name=group["application_name"], - device_name=group["gpu"], + application_name=application_name, + device_name=gpu_name, group_name=group["name"], group_display_name=group["display_name"], stochastic=group["stochastic"], diff --git a/src/autotuning_methodology/formats_interface.py b/src/autotuning_methodology/formats_interface.py index 75f61c8..571b544 100644 --- a/src/autotuning_methodology/formats_interface.py +++ b/src/autotuning_methodology/formats_interface.py @@ -1,6 +1,10 @@ """Interface for reading and writing input and output formats.""" -def load_T4_format(file_path: Path, validate: True) -> dict: +import json +from pathlib import Path +from autotuning_methodology.validators import validate_T4 + +def load_T4_format(filepath: Path, validate: True) -> dict: """Load and optionally validate a T4 format file.""" with open(filepath, "r", encoding="utf-8") as fh: # get the cache from the .json file From 2ae7f50856290d1a6858b070e50f5acc9b1275cf Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 13 May 2025 00:47:23 +0200 Subject: [PATCH 160/234] Various major improvements towards independence of optimization key and objective --- .../compare_hypertuners_paper.json | 21 +++++++++++++++---- src/autotuning_methodology/experiments.py | 11 ++++++++-- .../experiments_defaults.json | 21 +++++++++++++++---- src/autotuning_methodology/runner.py | 7 ++++--- .../searchspace_statistics.py | 13 ++++++++---- 5 files changed, 56 insertions(+), 17 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 183f16f..5f9210d 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -7,22 +7,34 @@ { "name": "dedispersion_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json" + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json" + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] }, { "name": "convolution_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json" + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "gemm_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json" + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] } ], "gpus": [ @@ -40,6 +52,7 @@ "repeats": 100, "samples": 32, "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 50, "ignore_cache": false }, "search_strategies": [ diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index b1d7848..f19017a 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -216,17 +216,19 @@ def generate_all_experimental_groups( group["objective_performance_keys"] = application["objective_performance_keys"] else: # load the full search space file and derive the objective performance keys + print(f"Loading full search space file {group['full_search_space_file']} to infer the objective performance keys. Consider setting them explicititely in the experiments file.") data = load_T4_format(group["full_search_space_file"], validate=True) objectives = data["results"][0]["objectives"] assert len(objectives) == 1, "Only one objective is supported for now" group["objective_performance_keys"] = objectives + objective = group["objective_performance_keys"][0] # derive the optimization direction if "minimization" in application: group["minimization"] = application["minimization"] - elif "time" in objectives[0].lower(): + elif "time" in objective.lower(): group["minimization"] = True - elif any(k in objectives[0].lower() for k in ["score", "gflop/s", "gflops", "gb/s"]): + elif any(k in objective.lower() for k in ["score", "gflop/s", "gflops", "gb/s"]): group["minimization"] = False else: raise NotImplementedError( @@ -511,10 +513,15 @@ def execute_experiment(filepath: str, profiling: bool = False): minimization = group["minimization"] objective_performance_keys = group["objective_performance_keys"] + # show the progress in the console print(f" | - running on GPU '{gpu_name}'") print(f" | - | tuning application '{application_name}'") print(f" | - | - | with settings of experimental group '{group['display_name']}'") + # overwrite the experiment statistics settings with the group settings + experiment["statistics_settings"]["minimization"] = minimization + experiment["statistics_settings"]["objective_performance_keys"] = objective_performance_keys + # create SearchspaceStatistics for full search space file associated with this group, if it does not exist if any( searchspace_statistics.get(group["gpu"], {}).get(group["application_name"], {}) == null_val diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index d89bfc2..e473573 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -7,22 +7,34 @@ { "name": "dedispersion_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json" + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json" + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] }, { "name": "convolution_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json" + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "gemm_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json" + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] } ], "gpus": [ @@ -37,6 +49,7 @@ "repeats": 25, "samples": 32, "minimum_fraction_of_budget_valid": 0.5, + "minimum_number_of_valid_search_iterations": 25, "ignore_cache": false }, "search_strategies": [ diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index b16036b..15afee9 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -158,8 +158,8 @@ def tune_with_kerneltuner(): warnings.warn( f"Much fewer configurations were returned ({num_results}) than the requested {max_fevals}" ) - if num_results < 2: - raise ValueError("Less than two configurations were returned") + if num_results < 2 and group["budget"]["max_fevals"] > 2: + raise ValueError(f"Less than two configurations were returned ({len(results['results'])}) \n") return metadata, results def tune_with_BAT(): @@ -240,8 +240,9 @@ def collect_results( raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") min_num_evals = max(round(minimum_fraction_of_budget_valid * min(max_fevals, searchspace_stats.size)), 2) if "minimum_number_of_valid_search_iterations" in group: + min_num_evals = min(min_num_evals, group["minimum_number_of_valid_search_iterations"]) warnings.warn( - f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the latter takes precedence." + f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the minimum ({min_num_evals}) is used." ) else: min_num_evals: int = group["minimum_number_of_valid_search_iterations"] diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index eb44c39..d4aed3c 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -265,7 +265,7 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: Returns: A tuple of the objective value at the cutoff point and the fevals to the cutoff point. """ - inverted_sorted_performance_arr = self.objective_performances_total_sorted[::-1] + inverted_sorted_performance_arr = self.objective_performances_total_sorted[::-1] if self.minimization else self.objective_performances_total_sorted N = inverted_sorted_performance_arr.shape[0] # get the objective performance at the cutoff point @@ -294,9 +294,14 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: # iterate over the inverted_sorted_performance_arr until we have # i = next(x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] > cutoff_percentile * arr[-1]) - i = next( - x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= objective_performance_at_cutoff_point - ) + if self.minimization: + i = next( + x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= objective_performance_at_cutoff_point + ) + else: + i = next( + x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] >= objective_performance_at_cutoff_point + ) if cutoff_percentile != 1.0 and inverted_sorted_performance_arr[i] == self.total_performance_absolute_optimum(): if i == 0: raise ValueError( From 86f67d657721e40209e338a606e3e9ed1d6ba6d7 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 13 May 2025 10:56:20 +0200 Subject: [PATCH 161/234] Switched to time-based cutoff point over function evaluations --- .../compare_hypertuners_paper.json | 6 +---- ...mpare_hypertuners_paper_heatmaps_left.json | 26 ++++++++++++------- ...pare_hypertuners_paper_heatmaps_right.json | 26 ++++++++++++------- .../experiments_defaults.json | 2 +- src/autotuning_methodology/runner.py | 6 ++--- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 5f9210d..78106b3 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -222,15 +222,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", + "cutoff_type": "time", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index e747452..b28810b 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -7,22 +7,34 @@ { "name": "dedispersion_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json" + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json" + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] }, { "name": "convolution_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json" + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "gemm_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json" + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] } ], "gpus": [ @@ -149,15 +161,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", + "cutoff_type": "time", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 9871944..8ebd128 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -7,22 +7,34 @@ { "name": "dedispersion_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json" + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "hotspot_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json" + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] }, { "name": "convolution_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json" + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] }, { "name": "gemm_milo", "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json" + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] } ], "gpus": [ @@ -149,15 +161,11 @@ } ], "statistics_settings": { - "minimization": true, "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", + "cutoff_type": "time", "objective_time_keys": [ "all" - ], - "objective_performance_keys": [ - "time" ] }, "visualization_settings": { diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index e473573..05c783d 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -63,7 +63,7 @@ "statistics_settings": { "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", + "cutoff_type": "time", "objective_time_keys": [ "all" ] diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 15afee9..81376c3 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -159,7 +159,7 @@ def tune_with_kerneltuner(): f"Much fewer configurations were returned ({num_results}) than the requested {max_fevals}" ) if num_results < 2 and group["budget"]["max_fevals"] > 2: - raise ValueError(f"Less than two configurations were returned ({len(results['results'])}) \n") + raise ValueError(f"Less than two configurations were returned ({len(results['results'])}, budget {group['budget']}) \n") return metadata, results def tune_with_BAT(): @@ -234,7 +234,7 @@ def collect_results( max_fevals = budget["max_fevals"] elif "time_limit" in budget: time_limit = budget["time_limit"] - time_per_feval = self.searchspace_stats.get_time_per_feval("mean_per_feval") + time_per_feval = searchspace_stats.get_time_per_feval("mean_per_feval") max_fevals = max(round(time_limit / time_per_feval), 2) else: raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") @@ -242,7 +242,7 @@ def collect_results( if "minimum_number_of_valid_search_iterations" in group: min_num_evals = min(min_num_evals, group["minimum_number_of_valid_search_iterations"]) warnings.warn( - f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}, {min_num_evals}) are set, the minimum ({min_num_evals}) is used." + f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}) are set, the minimum ({min_num_evals}) is used." ) else: min_num_evals: int = group["minimum_number_of_valid_search_iterations"] From 93114b9c0be8a7f5843cda233f0d345d398f2bb7 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 13 May 2025 17:17:53 +0200 Subject: [PATCH 162/234] Improved handling of cutoff start time and defaults, improved error reporting --- benchmark_hub | 2 +- .../compare_hypertuners_paper.json | 2 +- ...mpare_hypertuners_paper_heatmaps_left.json | 2 +- ...pare_hypertuners_paper_heatmaps_right.json | 2 +- .../compare_hypertuners_paper_test.json | 194 ++++++++++++++++++ src/autotuning_methodology/experiments.py | 2 +- .../experiments_defaults.json | 2 +- src/autotuning_methodology/runner.py | 2 +- 8 files changed, 201 insertions(+), 7 deletions(-) create mode 100644 experiment_files/compare_hypertuners_paper_test.json diff --git a/benchmark_hub b/benchmark_hub index 65853fd..ed1d304 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 65853fd897b28bb32989db5746684b2881ce021a +Subproject commit ed1d304612a1ed1dc68b03b92bc2ffa6c3024187 diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 78106b3..bd28cdb 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -223,7 +223,7 @@ ], "statistics_settings": { "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.5, + "cutoff_percentile_start": 0.01, "cutoff_type": "time", "objective_time_keys": [ "all" diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index b28810b..aa4210b 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -162,7 +162,7 @@ ], "statistics_settings": { "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.5, + "cutoff_percentile_start": 0.01, "cutoff_type": "time", "objective_time_keys": [ "all" diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 8ebd128..81e6f03 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -162,7 +162,7 @@ ], "statistics_settings": { "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.5, + "cutoff_percentile_start": 0.01, "cutoff_type": "time", "objective_time_keys": [ "all" diff --git a/experiment_files/compare_hypertuners_paper_test.json b/experiment_files/compare_hypertuners_paper_test.json new file mode 100644 index 0000000..c899bfa --- /dev/null +++ b/experiment_files/compare_hypertuners_paper_test.json @@ -0,0 +1,194 @@ +{ + "version": "1.2.0", + "name": "Compare hyperparameter tuning", + "parent_folder": "./hyperparametertuning_milo_new", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "W6600", + "A6000", + "A100", + "A4000", + "MI250X", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 25, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 50, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "diff_evo_tuned", + "search_method": "diff_evo", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "method", + "value": "best1bin" + } + ], + "display_name": "Differential Evolution tuned", + "autotuner": "KernelTuner" + }, + { + "name": "dual_annealing_tuned", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "COBYLA" + } + ], + "display_name": "Dual Annealing tuned", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "single_point" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm tuned", + "autotuner": "KernelTuner" + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 0.5 + } + ], + "display_name": "PSO tuned", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ], + "bins": 100 + }, + { + "scope": "aggregate", + "style": "line", + "vmin": "real" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index f19017a..b5c3938 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -327,7 +327,7 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: # register in the group group["budget"] = {} group["cutoff_times"] = { - "cutoff_time_start": max(cutoff_point_start_time * (1 - cutoff_margin), 0.0), + "cutoff_time_start": max(cutoff_point_start_time, 0.0) if statistics_settings["cutoff_percentile_start"] > 0.0 else 0.0, "cutoff_time": cutoff_point_time * (1 + cutoff_margin), } diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index 05c783d..ca5a536 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -62,7 +62,7 @@ ], "statistics_settings": { "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.5, + "cutoff_percentile_start": 0.01, "cutoff_type": "time", "objective_time_keys": [ "all" diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 81376c3..c1f7640 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -306,7 +306,7 @@ def cumulative_time_taken(results: list) -> list: if attempt > 0: report_multiple_attempts(rep, len_res, group["repeats"], attempt) if attempt >= 20: - raise RuntimeError(f"Could not find enough results in {attempt} attempts, quiting...") + raise RuntimeError(f"Could not find enough results for {results_description.application_name} on {results_description.device_name} in {attempt} attempts ({'only invalid, ' if only_invalid else ''}{len_res}/{min_num_evals}), quiting...") _, results, total_time_ms = tune( input_file, results_description.application_name, From a7e786c2bbc0bbecec31d2c19e9b63bce19e49ee Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 14 May 2025 14:08:27 +0200 Subject: [PATCH 163/234] Implemented specification of custom labels --- .../experiments_defaults.json | 2 +- .../schemas/experiments.json | 8 +++++ .../visualize_experiments.py | 29 ++++++++++--------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/autotuning_methodology/experiments_defaults.json b/src/autotuning_methodology/experiments_defaults.json index ca5a536..96cbf54 100644 --- a/src/autotuning_methodology/experiments_defaults.json +++ b/src/autotuning_methodology/experiments_defaults.json @@ -48,7 +48,7 @@ "stochastic": true, "repeats": 25, "samples": 32, - "minimum_fraction_of_budget_valid": 0.5, + "minimum_fraction_of_budget_valid": 0.25, "minimum_number_of_valid_search_iterations": 25, "ignore_cache": false }, diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 96acffe..c963a9f 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -398,6 +398,14 @@ }, "minItems": 1, "uniqueItems": true + }, + "x_label": { + "description": "Override the default generated label for the x-axis.", + "type": "string" + }, + "y_label": { + "description": "Override the default generated label for the y-axis.", + "type": "string" } } } diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 58816d9..5cf7414 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -560,10 +560,10 @@ def norm_color_val(v): fig.suptitle(title) # plot the heatmap - axs[0].set_xlabel(label_data[x_type][1]) + axs[0].set_xlabel(plot.get("xlabel", label_data[x_type][1])) axs[0].set_xticks(ticks=np.arange(len(x_ticks)), labels=x_ticks, rotation=0) if include_y_labels is True or None: - axs[0].set_ylabel(label_data[y_type][1]) + axs[0].set_ylabel(plot.get("ylabel", label_data[y_type][1])) axs[0].set_yticks(ticks=np.arange(len(y_ticks)), labels=y_ticks) if include_y_labels is True: # axs[0].yaxis.set_label_position("right") @@ -785,7 +785,7 @@ def norm_color_val(v): # finalize the figure and save or display it lowest_real_y_value = self.plot_strategies_aggregated( - axs[0], aggregation_data, plot_settings=self.experiment["visualization_settings"] + axs[0], aggregation_data, visualization_settings=self.experiment["visualization_settings"], plot_settings=plot ) if vmin is not None: if isinstance(vmin, (int, float)): @@ -1297,14 +1297,16 @@ def plot_strategies_aggregated( self, ax: plt.Axes, aggregation_data, - plot_settings: dict, + visualization_settings: dict = {}, + plot_settings: dict = {}, ) -> float: """Plots all optimization strategies combined accross search spaces. Args: ax: the axis to plot on. aggregation_data: the aggregated data from the various searchspaces. - plot_settings: dictionary of additional plot settings. + visualization_settings: dictionary of additional visualization settings. + plot_settings: dictionary of additional visualization settings related to this particular plot. Returns: The lowest performance value of the real stopping point for all strategies. @@ -1314,7 +1316,7 @@ def plot_strategies_aggregated( ax.axhline(1, label="Absolute optimum", c="black", ls="-.") # get the relative aggregated performance for each strategy - confidence_level: float = plot_settings.get("confidence_level", 0.95) + confidence_level: float = visualization_settings.get("confidence_level", 0.95) ( strategies_performance, strategies_lower_err, @@ -1322,6 +1324,12 @@ def plot_strategies_aggregated( strategies_real_stopping_point_fraction, ) = get_strategies_aggregated_performance(list(aggregation_data.values()), confidence_level) + # get the relevant plot settings + cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) + xlabel = plot_settings.get("xlabel", f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)") # noqa: E501 + ylabel = plot_settings.get("ylabel", self.y_metric_displayname["aggregate_objective"]) + # plot each strategy y_axis_size = strategies_performance[0].shape[0] time_range = np.arange(y_axis_size) @@ -1397,13 +1405,8 @@ def plot_strategies_aggregated( print(f" | performance of {displayname}: {performance_score} (±{performance_score_std})") # set the axis - cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) - cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) - ax.set_xlabel( - f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)", # noqa: E501 - fontsize="large", - ) - ax.set_ylabel(self.y_metric_displayname["aggregate_objective"], fontsize="large") + ax.set_xlabel(xlabel, fontsize="large") + ax.set_ylabel(ylabel, fontsize="large") num_ticks = 11 ax.set_xticks( np.linspace(0, y_axis_size, num_ticks), From 487c12bc604f3c4202a8a823db3006edac656520 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 15 May 2025 10:31:59 +0200 Subject: [PATCH 164/234] Slight adjustments to evaluation --- .../compare_hypertuners_paper.json | 16 +- .../compare_hypertuners_paper_test.json | 194 ------------------ src/autotuning_methodology/runner.py | 2 +- 3 files changed, 15 insertions(+), 197 deletions(-) delete mode 100644 experiment_files/compare_hypertuners_paper_test.json diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index bd28cdb..3206262 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -52,7 +52,7 @@ "repeats": 100, "samples": 32, "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 50, + "minimum_number_of_valid_search_iterations": 10, "ignore_cache": false }, "search_strategies": [ @@ -264,10 +264,22 @@ ], "bins": 100 }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3 + }, { "scope": "aggregate", "style": "line", - "vmin": "real" + "ylabel": "Aggregate performance relative to baseline" } ], "resolution": 1000.0, diff --git a/experiment_files/compare_hypertuners_paper_test.json b/experiment_files/compare_hypertuners_paper_test.json deleted file mode 100644 index c899bfa..0000000 --- a/experiment_files/compare_hypertuners_paper_test.json +++ /dev/null @@ -1,194 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning_milo_new", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "W6600", - "A6000", - "A100", - "A4000", - "MI250X", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 25, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 50, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "diff_evo_tuned", - "search_method": "diff_evo", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "method", - "value": "best1bin" - } - ], - "display_name": "Differential Evolution tuned", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "COBYLA" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 20 - } - ], - "display_name": "Genetic Algorithm tuned", - "autotuner": "KernelTuner" - }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "c1", - "value": 3.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "display_name": "PSO tuned", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "searchspaces" - ], - "bins": 100 - }, - { - "scope": "aggregate", - "style": "line", - "vmin": "real" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index c1f7640..9d31dd6 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -240,7 +240,7 @@ def collect_results( raise ValueError(f"Unkown budget {budget}, can not calculate minimum fraction of budget valid") min_num_evals = max(round(minimum_fraction_of_budget_valid * min(max_fevals, searchspace_stats.size)), 2) if "minimum_number_of_valid_search_iterations" in group: - min_num_evals = min(min_num_evals, group["minimum_number_of_valid_search_iterations"]) + min_num_evals = max(min(min_num_evals, group["minimum_number_of_valid_search_iterations"]), 2) warnings.warn( f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}) are set, the minimum ({min_num_evals}) is used." ) From 2702a54a77f8634cb50f3aa0156cdb9a0a9681cd Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 15 May 2025 10:36:00 +0200 Subject: [PATCH 165/234] Moved hyperparameter tuning data to benchmark hub --- benchmark_hub | 2 +- .../dummy.cu | 0 .../metatuning_diff_evo.json | 62 ---------------- .../metatuning_dual_annealing.json | 50 ------------- .../metatuning_genetic_algorithm.json | 68 ----------------- .../metatuning_pso.json | 74 ------------------- .../metatuning_simulated_annealing.json | 68 ----------------- .../compare_hypertuners_metastrategy.json | 42 ++++++----- 8 files changed, 26 insertions(+), 340 deletions(-) delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/dummy.cu delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json delete mode 100644 cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json diff --git a/benchmark_hub b/benchmark_hub index ed1d304..73c92be 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit ed1d304612a1ed1dc68b03b92bc2ffa6c3024187 +Subproject commit 73c92bef91d37d688673c407e8afaeb3044fcd76 diff --git a/cached_data_used/hyperparametertuning_metastrategies/dummy.cu b/cached_data_used/hyperparametertuning_metastrategies/dummy.cu deleted file mode 100644 index e69de29..0000000 diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json deleted file mode 100644 index 4a00666..0000000 --- a/cached_data_used/hyperparametertuning_metastrategies/metatuning_diff_evo.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "General": { - "BenchmarkName": "hyperparamtuning_diff_evo", - "OutputFormat": "JSON" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "method", - "Type": "string", - "Values": "['best1bin', 'best1exp', 'rand1exp', 'randtobest1exp', 'best2exp', 'rand2exp', 'randtobest1bin', 'best2bin', 'rand2bin', 'rand1bin']", - "Default": "randtobest1bin" - }, - { - "Name": "popsize", - "Type": "int", - "Values": "[10, 20, 30]", - "Default": 20 - }, - { - "Name": "maxiter", - "Type": "int", - "Values": "[50, 100, 150]", - "Default": 100 - } - ], - "Conditions": [] - }, - "KernelSpecification": { - "Language": "Hypertuner", - "BenchmarkName": "hyperparamtuning_diff_evo", - "KernelName": "hyperparamtuning_diff_evo", - "KernelFile": "dummy.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 25000, - 2048, - 1 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [] - } -} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json deleted file mode 100644 index 79006d0..0000000 --- a/cached_data_used/hyperparametertuning_metastrategies/metatuning_dual_annealing.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "General": { - "BenchmarkName": "hyperparamtuning_dual_annealing", - "OutputFormat": "JSON" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "method", - "Type": "string", - "Values": "['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr']", - "Default": "COBYLA" - } - ], - "Conditions": [] - }, - "KernelSpecification": { - "Language": "Hypertuner", - "BenchmarkName": "hyperparamtuning_dual_annealing", - "KernelName": "hyperparamtuning_dual_annealing", - "KernelFile": "dummy.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 25000, - 2048, - 1 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [] - } -} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json deleted file mode 100644 index 9cf4adf..0000000 --- a/cached_data_used/hyperparametertuning_metastrategies/metatuning_genetic_algorithm.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "General": { - "BenchmarkName": "hyperparamtuning_genetic_algorithm", - "OutputFormat": "JSON" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "method", - "Type": "string", - "Values": "['single_point', 'two_point', 'uniform', 'disruptive_uniform']", - "Default": "uniform" - }, - { - "Name": "popsize", - "Type": "int", - "Values": "[10, 20, 30]", - "Default": 20 - }, - { - "Name": "maxiter", - "Type": "int", - "Values": "[50, 100, 150]", - "Default": 100 - }, - { - "Name": "mutation_chance", - "Type": "int", - "Values": "[5, 10, 20]", - "Default": 10 - } - ], - "Conditions": [] - }, - "KernelSpecification": { - "Language": "Hypertuner", - "BenchmarkName": "hyperparamtuning_genetic_algorithm", - "KernelName": "hyperparamtuning_genetic_algorithm", - "KernelFile": "dummy.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 25000, - 2048, - 1 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [] - } -} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json deleted file mode 100644 index 19889bd..0000000 --- a/cached_data_used/hyperparametertuning_metastrategies/metatuning_pso.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "General": { - "BenchmarkName": "hyperparamtuning_psog", - "OutputFormat": "JSON" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "popsize", - "Type": "int", - "Values": "[10, 20, 30]", - "Default": 20 - }, - { - "Name": "popsize", - "Type": "int", - "Values": "[10, 20, 30]", - "Default": 20 - }, - { - "Name": "maxiter", - "Type": "int", - "Values": "[50, 100, 150]", - "Default": 100 - }, - { - "Name": "c1", - "Type": "float", - "Values": "[1.0, 2.0, 3.0]", - "Default": 2.0 - }, - { - "Name": "c2", - "Type": "float", - "Values": "[0.5, 1.0, 1.5]", - "Default": 1.0 - } - ], - "Conditions": [] - }, - "KernelSpecification": { - "Language": "Hypertuner", - "BenchmarkName": "hyperparamtuning_pso", - "KernelName": "hyperparamtuning_pso", - "KernelFile": "dummy.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 25000, - 2048, - 1 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [] - } -} \ No newline at end of file diff --git a/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json b/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json deleted file mode 100644 index 2692ac9..0000000 --- a/cached_data_used/hyperparametertuning_metastrategies/metatuning_simulated_annealing.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "General": { - "BenchmarkName": "hyperparamtuning_dual_annealing", - "OutputFormat": "JSON" - }, - "ConfigurationSpace": { - "TuningParameters": [ - { - "Name": "T", - "Type": "int", - "Values": "[0.5, 1.0, 1.5]", - "Default": 1.0 - }, - { - "Name": "T_min", - "Type": "int", - "Values": "[0.0001, 0.001, 0.01]", - "Default": 0.001 - }, - { - "Name": "alpha", - "Type": "int", - "Values": "[0.9925, 0.995, 0.9975]", - "Default": 0.995 - }, - { - "Name": "maxiter", - "Type": "int", - "Values": "[1, 2, 3]", - "Default": 2 - } - ], - "Conditions": [] - }, - "KernelSpecification": { - "Language": "Hypertuner", - "BenchmarkName": "hyperparamtuning_simulated_annealing", - "KernelName": "hyperparamtuning_simulated_annealing", - "KernelFile": "dummy.cu", - "GlobalSizeType": "CUDA", - "LocalSize": { - "X": "block_size_x", - "Y": "block_size_y", - "Z": "1" - }, - "GlobalSize": { - "X": "(262144 // block_size_x) // tile_size_x", - "Y": "(262144 // block_size_y) // tile_size_y", - "Z": "1" - }, - "GridDivX": [ - "block_size_x", - "tile_size_x" - ], - "GridDivY": [ - "block_size_y", - "tile_size_y" - ], - "ProblemSize": [ - 25000, - 2048, - 1 - ], - "SharedMemory": 0, - "Stream": null, - "Arguments": [] - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index aa0395c..6e00226 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -1,35 +1,47 @@ { - "version": "1.1.0", + "version": "1.2.0", "name": "Compare hyperparameter metastrategies", "parent_folder": "./hyperparametertuning_metastrategies", "experimental_groups_defaults": { "applications": [ { "name": "diff_evo", - "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", - "input_file": "metatuning_diff_evo.json" + "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "input_file": "metatuning_diff_evo.json", + "objective_performance_keys": [ + "score" + ] }, { "name": "pso", - "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", - "input_file": "metatuning_pso.json" + "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "input_file": "metatuning_pso.json", + "objective_performance_keys": [ + "score" + ] }, { "name": "genetic_algorithm", - "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", - "input_file": "metatuning_genetic_algorithm.json" + "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "input_file": "metatuning_genetic_algorithm.json", + "objective_performance_keys": [ + "score" + ] }, { "name": "simulated_annealing", - "folder": "../autotuning_methodology/cached_data_used/hyperparametertuning_metastrategies", - "input_file": "metatuning_simulated_annealing.json" + "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "input_file": "metatuning_simulated_annealing.json", + "objective_performance_keys": [ + "score" + ] } ], "gpus": [ "A4000" ], "pattern_for_full_search_space_filenames": { - "regex": "/Users/fjwillemsen/Downloads/new_0.95_10x50x/hyperparamtuning_paper_bruteforce_${applications}_C_T4.json" + "regex": "./benchmark_hub/hyperparametertuning/cachefiles/${applications}_T4.json" }, "stochastic": true, "repeats": 50, @@ -70,17 +82,13 @@ } ], "statistics_settings": { - "minimization": true, - "cutoff_percentile": 0.99, - "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", "objective_time_keys": [ "compilation", "framework", "search_algorithm" - ], - "objective_performance_keys": [ - "score" ] }, "visualization_settings": { From 068b65d3415265ee16e9665a7eb8ccf0225fb5df Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 15 May 2025 17:52:36 +0200 Subject: [PATCH 166/234] Improved visualization to plot only real values on time axis --- benchmark_hub | 2 +- .../compare_hypertuners_metastrategy.json | 53 ++++++++----------- src/autotuning_methodology/runner.py | 2 +- .../schemas/experiments.json | 8 +++ .../searchspace_statistics.py | 2 +- .../visualize_experiments.py | 52 +++++++++++++----- 6 files changed, 72 insertions(+), 47 deletions(-) diff --git a/benchmark_hub b/benchmark_hub index 73c92be..a1da35a 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 73c92bef91d37d688673c407e8afaeb3044fcd76 +Subproject commit a1da35a9850a8fd4e23789e4ac326ae7aa7e8f74 diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index 6e00226..72cb621 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -6,7 +6,7 @@ "applications": [ { "name": "diff_evo", - "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", "input_file": "metatuning_diff_evo.json", "objective_performance_keys": [ "score" @@ -14,7 +14,7 @@ }, { "name": "pso", - "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", "input_file": "metatuning_pso.json", "objective_performance_keys": [ "score" @@ -22,7 +22,7 @@ }, { "name": "genetic_algorithm", - "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", "input_file": "metatuning_genetic_algorithm.json", "objective_performance_keys": [ "score" @@ -30,7 +30,7 @@ }, { "name": "simulated_annealing", - "folder": "../autotuning_methodology/hyperparametertuning/benchmark_hub/algorithms", + "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", "input_file": "metatuning_simulated_annealing.json", "objective_performance_keys": [ "score" @@ -41,15 +41,22 @@ "A4000" ], "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/hyperparametertuning/cachefiles/${applications}_T4.json" + "regex": "./benchmark_hub/hyperparametertuning/cachefiles/hyperparamtuning_paper_bruteforce_${applications}_T4.json" }, "stochastic": true, - "repeats": 50, - "samples": 32, + "repeats": 10, + "samples": 1, + "minimum_fraction_of_budget_valid": 0.1, "minimum_number_of_valid_search_iterations": 2, "ignore_cache": false }, "search_strategies": [ + { + "name": "meta_genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, { "name": "meta_diff_evo", "search_method": "diff_evo", @@ -68,12 +75,6 @@ "display_name": "PSO", "autotuner": "KernelTuner" }, - { - "name": "meta_genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", - "autotuner": "KernelTuner" - }, { "name": "meta_simulated_annealing", "search_method": "simulated_annealing", @@ -82,34 +83,24 @@ } ], "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, + "cutoff_percentile": 0.001, + "cutoff_percentile_start": 0.0, "cutoff_type": "time", "objective_time_keys": [ - "compilation", - "framework", - "search_algorithm" + "all" ] }, "visualization_settings": { "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, { "scope": "aggregate", - "style": "line" + "style": "line", + "xlabel": "Relative time until the last optimization algorithm stopped", + "ylabel": "Aggregate performance relative to baseline", + "tmin": "real" } ], - "resolution": 1000.0, + "resolution": 100000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 9d31dd6..589c0c0 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -242,7 +242,7 @@ def collect_results( if "minimum_number_of_valid_search_iterations" in group: min_num_evals = max(min(min_num_evals, group["minimum_number_of_valid_search_iterations"]), 2) warnings.warn( - f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}) are set, the minimum ({min_num_evals}) is used." + f"Both 'minimum_number_of_valid_search_iterations' ({group['minimum_number_of_valid_search_iterations']}) and 'minimum_fraction_of_budget_valid' ({minimum_fraction_of_budget_valid}/{max_fevals=}) are set, the minimum ({min_num_evals}) is used." ) else: min_num_evals: int = group["minimum_number_of_valid_search_iterations"] diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index c963a9f..740cf5d 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -323,6 +323,14 @@ "heatmap" ] }, + "tmin": { + "description": "The minimum value on the time axis of the aggregate plot.", + "type": [ + "number", + "string" + ], + "default": 1.0 + }, "vmin": { "description": "The minimum value on the heatmap color scale or aggregate plot.", "type": [ diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index d4aed3c..fa014df 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -372,7 +372,7 @@ def cutoff_point_fevals_time_start_end( raise ValueError("Cutoff point start and end are the same") # get the times - cutoff_point_time_start = self.cutoff_point_time_from_fevals(cutoff_point_fevals_start) + cutoff_point_time_start = self.cutoff_point_time_from_fevals(cutoff_point_fevals_start if cutoff_percentile_start > 0.0 else 0) cutoff_point_time_end = self.cutoff_point_time_from_fevals(cutoff_point_fevals_end) # return the values diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5cf7414..8663143 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -100,10 +100,14 @@ class Visualize: "time_total": "Total time in seconds", "aggregate_time": "Relative time to cutoff point", "time_partial_framework_time": "framework time", + "time_partial_framework": "framework time", "time_partial_strategy_time": "strategy time", + "time_partial_search_algorithm": "strategy time", "time_partial_compile_time": "compile time", + "time_partial_compilation": "compile time", "time_partial_benchmark_time": "kernel runtime", "time_partial_times": "kernel runtime", + "time_partial_runtimes": "kernel runtime", "time_partial_verification_time": "verification time", } ) @@ -1329,21 +1333,46 @@ def plot_strategies_aggregated( cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) xlabel = plot_settings.get("xlabel", f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)") # noqa: E501 ylabel = plot_settings.get("ylabel", self.y_metric_displayname["aggregate_objective"]) + tmin = plot_settings.get("tmin", 1.0) - # plot each strategy + # setup the plot y_axis_size = strategies_performance[0].shape[0] time_range = np.arange(y_axis_size) plot_errors = True lowest_real_y_value = 0.0 print("\n-------") print("Quantification of aggregate performance across all search spaces:") + + # get the highest real_stopping_point_index, adjust y_axis_size and time_range if necessary + real_stopping_point_indices = [min(round(strategies_real_stopping_point_fraction[strategy_index] * time_range.shape[0]) + 1, time_range.shape[0]) for strategy_index in range(len(strategies_performance))] # noqa: E501 + real_stopping_point_index_max = max(real_stopping_point_indices) + if tmin == "real": + # stop the time at the largest real stopping point + y_axis_size = min(real_stopping_point_index_max, y_axis_size) + time_range = np.arange(y_axis_size) + elif tmin < 1.0: + # stop the time at the given tmin + y_axis_size = y_axis_size * tmin + time_range = np.arange(y_axis_size) + elif tmin > 1.0: + raise ValueError(f"Invalid {tmin=}, must be between 0.0 and 1.0 or 'real'") + + # adjust the xlabel if necessary + if tmin != 1.0 and not "xlabel" in plot_settings: + xlabel = f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)" + + # plot each strategy for strategy_index, strategy_performance in enumerate(strategies_performance): if self.strategies[strategy_index]["name"] in self.plot_skip_strategies: continue displayname = self.strategies[strategy_index]["display_name"] color = self.colors[strategy_index] - real_stopping_point_fraction = strategies_real_stopping_point_fraction[strategy_index] - real_stopping_point_index = round(real_stopping_point_fraction * time_range.shape[0]) + real_stopping_point_index = real_stopping_point_indices[strategy_index] + if real_stopping_point_index <= 1: + warnings.warn(f"Stopping point index for {displayname} is at {real_stopping_point_index}") + continue + + # calculate the lowest real_y_value lowest_real_y_value = min( lowest_real_y_value, ( @@ -1353,9 +1382,6 @@ def plot_strategies_aggregated( ), ) assert isinstance(lowest_real_y_value, (int, float)), f"Invalid {lowest_real_y_value=}" - if real_stopping_point_index <= 0: - warnings.warn(f"Stopping point index for {displayname} is at {real_stopping_point_index}") - continue # plot the errors if plot_errors: @@ -1374,9 +1400,9 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_lower_err) - 1 ): ax.fill_between( - time_range[real_stopping_point_index:], - strategy_lower_err[real_stopping_point_index:], - strategy_upper_err[real_stopping_point_index:], + time_range[real_stopping_point_index-1:y_axis_size], + strategy_lower_err[real_stopping_point_index-1:y_axis_size], + strategy_upper_err[real_stopping_point_index-1:y_axis_size], alpha=0.15, antialiased=True, color=color, @@ -1395,8 +1421,8 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_performance) - 1 ): ax.plot( - time_range[real_stopping_point_index:], - strategy_performance[real_stopping_point_index:], + time_range[real_stopping_point_index-1:y_axis_size], + strategy_performance[real_stopping_point_index-1:y_axis_size], color=color, ls="dashed", ) @@ -1404,7 +1430,7 @@ def plot_strategies_aggregated( performance_score_std = round(np.std(strategy_performance), 3) print(f" | performance of {displayname}: {performance_score} (±{performance_score_std})") - # set the axis + # set the axis labels and ticks ax.set_xlabel(xlabel, fontsize="large") ax.set_ylabel(ylabel, fontsize="large") num_ticks = 11 @@ -1413,7 +1439,7 @@ def plot_strategies_aggregated( np.round(np.linspace(0, 1, num_ticks), 2), ) ax.set_ylim(top=1.02) - ax.set_xlim((0, y_axis_size)) + ax.set_xlim((0, y_axis_size-1)) ax.legend() return lowest_real_y_value From 1805ddf14cb26fa50ecb8423aca75d0707ec7a69 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 15 May 2025 17:54:57 +0200 Subject: [PATCH 167/234] Improved handling of labels en ticks --- .../compare_hypertuners_metastrategy.json | 2 +- .../visualize_experiments.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index 72cb621..1ea0ed2 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -44,7 +44,7 @@ "regex": "./benchmark_hub/hyperparametertuning/cachefiles/hyperparamtuning_paper_bruteforce_${applications}_T4.json" }, "stochastic": true, - "repeats": 10, + "repeats": 25, "samples": 1, "minimum_fraction_of_budget_valid": 0.1, "minimum_number_of_valid_search_iterations": 2, diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 8663143..5760bd2 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1358,8 +1358,8 @@ def plot_strategies_aggregated( raise ValueError(f"Invalid {tmin=}, must be between 0.0 and 1.0 or 'real'") # adjust the xlabel if necessary - if tmin != 1.0 and not "xlabel" in plot_settings: - xlabel = f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)" + if tmin == "real" and not "xlabel" in plot_settings: + xlabel = "Relative time until the last strategy stopped" # plot each strategy for strategy_index, strategy_performance in enumerate(strategies_performance): @@ -1433,11 +1433,13 @@ def plot_strategies_aggregated( # set the axis labels and ticks ax.set_xlabel(xlabel, fontsize="large") ax.set_ylabel(ylabel, fontsize="large") - num_ticks = 11 - ax.set_xticks( - np.linspace(0, y_axis_size, num_ticks), - np.round(np.linspace(0, 1, num_ticks), 2), - ) + + if tmin != "real": + num_ticks = 11 + ax.set_xticks( + np.linspace(0, y_axis_size, num_ticks), + np.round(np.linspace(0, tmin, num_ticks), 2), + ) ax.set_ylim(top=1.02) ax.set_xlim((0, y_axis_size-1)) ax.legend() From 51f93e23de9553e3a580c4a3d95a99b94d396f9f Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 16 May 2025 00:49:05 +0200 Subject: [PATCH 168/234] Improved handling of labels en ticks --- experiment_files/compare_hypertuners_metastrategy.json | 8 ++++---- src/autotuning_methodology/visualize_experiments.py | 9 +++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index 1ea0ed2..4761856 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -44,7 +44,7 @@ "regex": "./benchmark_hub/hyperparametertuning/cachefiles/hyperparamtuning_paper_bruteforce_${applications}_T4.json" }, "stochastic": true, - "repeats": 25, + "repeats": 100, "samples": 1, "minimum_fraction_of_budget_valid": 0.1, "minimum_number_of_valid_search_iterations": 2, @@ -83,7 +83,7 @@ } ], "statistics_settings": { - "cutoff_percentile": 0.001, + "cutoff_percentile": 0.4, "cutoff_percentile_start": 0.0, "cutoff_type": "time", "objective_time_keys": [ @@ -95,12 +95,12 @@ { "scope": "aggregate", "style": "line", - "xlabel": "Relative time until the last optimization algorithm stopped", + "xlabel": "Relative time until the budget is exhausted", "ylabel": "Aggregate performance relative to baseline", "tmin": "real" } ], - "resolution": 100000.0, + "resolution": 1000000.0, "confidence_level": 0.95, "compare_baselines": false, "compare_split_times": false diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5760bd2..d0d1517 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1430,16 +1430,21 @@ def plot_strategies_aggregated( performance_score_std = round(np.std(strategy_performance), 3) print(f" | performance of {displayname}: {performance_score} (±{performance_score_std})") - # set the axis labels and ticks + # set the axis labels ax.set_xlabel(xlabel, fontsize="large") ax.set_ylabel(ylabel, fontsize="large") - if tmin != "real": + # set the ticks + if tmin == "real": + ax.set_xticks([], []) + else: num_ticks = 11 ax.set_xticks( np.linspace(0, y_axis_size, num_ticks), np.round(np.linspace(0, tmin, num_ticks), 2), ) + + # set the limits and legend ax.set_ylim(top=1.02) ax.set_xlim((0, y_axis_size-1)) ax.legend() From c4fff3aa066fd64c27fd783702103e86fec3ac7a Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 16 May 2025 18:58:20 +0200 Subject: [PATCH 169/234] Updated hyperparameters of PSO and simulated annealing --- .../compare_hypertuners_paper.json | 55 ++++++++++++++++++- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 3206262..9c51540 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -62,7 +62,7 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 20 + "value": 30 }, { "name": "maxiter", @@ -90,7 +90,7 @@ }, { "name": "method", - "value": "best2exp" + "value": "best1bin" } ], "display_name": "Differential Evolution untuned", @@ -213,12 +213,61 @@ }, { "name": "c2", - "value": 1.0 + "value": 1.5 } ], "display_name": "PSO untuned", "autotuner": "KernelTuner", "color_parent": "pso_tuned" + }, + { + "name": "simulated_annealing_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 0.5 + }, + { + "name": "T_min", + "value": 0.001 + }, + { + "name": "alpha", + "value": 0.9975 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing tuned", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_inv_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.0 + }, + { + "name": "T_min", + "value": 0.01 + }, + { + "name": "alpha", + "value": 0.9925 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing untuned", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_tuned" } ], "statistics_settings": { From e1294d34cbf1e9a3a4a176f9082255c79e14e254 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 18 May 2025 11:50:37 +0200 Subject: [PATCH 170/234] Updated hyperparameters of genetic algorithm --- experiment_files/compare_hypertuners_paper.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 9c51540..7316c5e 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -128,11 +128,11 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 30 + "value": 20 }, { "name": "maxiter", - "value": 50 + "value": 150 }, { "name": "method", @@ -140,7 +140,7 @@ }, { "name": "mutation_chance", - "value": 20 + "value": 5 } ], "display_name": "Genetic Algorithm tuned", @@ -160,11 +160,11 @@ }, { "name": "method", - "value": "disruptive_uniform" + "value": "single_point" }, { "name": "mutation_chance", - "value": 20 + "value": 10 } ], "display_name": "Genetic Algorithm untuned", From 2c7fada57a68df94fd50066a753f51f817201fe9 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 19 May 2025 10:15:53 +0200 Subject: [PATCH 171/234] Updated hyperparameters of firefly algorithm --- .../compare_hypertuners_paper.json | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 7316c5e..368ffbd 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -57,28 +57,36 @@ }, "search_strategies": [ { - "name": "diff_evo_tuned", - "search_method": "diff_evo", + "name": "firefly_tuned", + "search_method": "firefly_algorithm", "search_method_hyperparameters": [ { "name": "popsize", - "value": 30 + "value": 20 }, { "name": "maxiter", "value": 100 }, { - "name": "method", - "value": "best1bin" + "name": "B0", + "value": 1.0 + }, + { + "name": "gamma", + "value": 0.5 + }, + { + "name": "alpha", + "value": 0.2 } ], - "display_name": "Differential Evolution tuned", + "display_name": "Firefly tuned", "autotuner": "KernelTuner" }, { - "name": "diff_evo_inv_tuned", - "search_method": "diff_evo", + "name": "firefly_inv_tuned", + "search_method": "firefly_algorithm", "search_method_hyperparameters": [ { "name": "popsize", @@ -86,16 +94,24 @@ }, { "name": "maxiter", - "value": 50 + "value": 150 }, { - "name": "method", - "value": "best1bin" + "name": "B0", + "value": 1.5 + }, + { + "name": "gamma", + "value": 0.25 + }, + { + "name": "alpha", + "value": 0.1 } ], - "display_name": "Differential Evolution untuned", + "display_name": "Firefly untuned", "autotuner": "KernelTuner", - "color_parent": "diff_evo_tuned" + "color_parent": "firefly_tuned" }, { "name": "dual_annealing_tuned", From 54146ed52d94bb4cffe36fd7666f15b50e834fbb Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 19 May 2025 16:48:36 +0200 Subject: [PATCH 172/234] Minor changes to experiment files --- .../compare_hypertuners_paper.json | 61 +------------------ ...mpare_hypertuners_paper_heatmaps_left.json | 38 +++--------- ...pare_hypertuners_paper_heatmaps_right.json | 40 +++--------- 3 files changed, 21 insertions(+), 118 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 368ffbd..67e956a 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -38,11 +38,11 @@ } ], "gpus": [ - "W6600", - "A6000", "A100", "A4000", "MI250X", + "A6000", + "W6600", "W7800" ], "pattern_for_full_search_space_filenames": { @@ -56,63 +56,6 @@ "ignore_cache": false }, "search_strategies": [ - { - "name": "firefly_tuned", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "B0", - "value": 1.0 - }, - { - "name": "gamma", - "value": 0.5 - }, - { - "name": "alpha", - "value": 0.2 - } - ], - "display_name": "Firefly tuned", - "autotuner": "KernelTuner" - }, - { - "name": "firefly_inv_tuned", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "B0", - "value": 1.5 - }, - { - "name": "gamma", - "value": 0.25 - }, - { - "name": "alpha", - "value": 0.1 - } - ], - "display_name": "Firefly untuned", - "autotuner": "KernelTuner", - "color_parent": "firefly_tuned" - }, { "name": "dual_annealing_tuned", "search_method": "dual_annealing", diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json index aa4210b..900d63c 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_left.json @@ -38,11 +38,11 @@ } ], "gpus": [ - "W6600", - "A6000", "A100", "A4000", "MI250X", + "A6000", + "W6600", "W7800" ], "pattern_for_full_search_space_filenames": { @@ -55,26 +55,6 @@ "ignore_cache": false }, "search_strategies": [ - { - "name": "diff_evo_inv_tuned", - "search_method": "diff_evo", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "best2exp" - } - ], - "display_name": "Differential Evolution untuned", - "autotuner": "KernelTuner" - }, { "name": "dual_annealing_inv_tuned", "search_method": "dual_annealing", @@ -101,11 +81,11 @@ }, { "name": "method", - "value": "disruptive_uniform" + "value": "single_point" }, { "name": "mutation_chance", - "value": 20 + "value": 10 } ], "display_name": "Genetic Algorithm untuned", @@ -129,7 +109,7 @@ }, { "name": "c2", - "value": 1.0 + "value": 1.5 } ], "display_name": "PSO untuned", @@ -141,7 +121,7 @@ "search_method_hyperparameters": [ { "name": "T", - "value": 1.5 + "value": 1.0 }, { "name": "T_min", @@ -149,11 +129,11 @@ }, { "name": "alpha", - "value": 0.995 + "value": 0.9925 }, { "name": "maxiter", - "value": 3 + "value": 1 } ], "display_name": "Simulated Annealing untuned", @@ -179,7 +159,7 @@ "y_axis_value_types": [ "gpus" ], - "cmin": -4.0, + "cmin": -6.0, "include_y_labels": true, "include_colorbar": false, "divide_train_test_axis": "gpus", diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json index 81e6f03..59d94b9 100644 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ b/experiment_files/compare_hypertuners_paper_heatmaps_right.json @@ -38,11 +38,11 @@ } ], "gpus": [ - "W6600", - "A6000", "A100", "A4000", "MI250X", + "A6000", + "W6600", "W7800" ], "pattern_for_full_search_space_filenames": { @@ -55,26 +55,6 @@ "ignore_cache": false }, "search_strategies": [ - { - "name": "diff_evo_tuned", - "search_method": "diff_evo", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "method", - "value": "best1bin" - } - ], - "display_name": "Differential Evolution tuned", - "autotuner": "KernelTuner" - }, { "name": "dual_annealing_tuned", "search_method": "dual_annealing", @@ -93,11 +73,11 @@ "search_method_hyperparameters": [ { "name": "popsize", - "value": 30 + "value": 20 }, { "name": "maxiter", - "value": 50 + "value": 150 }, { "name": "method", @@ -105,7 +85,7 @@ }, { "name": "mutation_chance", - "value": 20 + "value": 5 } ], "display_name": "Genetic Algorithm tuned", @@ -141,15 +121,15 @@ "search_method_hyperparameters": [ { "name": "T", - "value": 1.5 + "value": 0.5 }, { "name": "T_min", - "value": 0.0001 + "value": 0.001 }, { "name": "alpha", - "value": 0.995 + "value": 0.9975 }, { "name": "maxiter", @@ -179,8 +159,8 @@ "y_axis_value_types": [ "gpus" ], - "cmin": -4.0, - "cnum": 6, + "cmin": -6.0, + "cnum": 8, "include_y_labels": false, "include_colorbar": true, "divide_train_test_axis": "gpus", From 3473f2f51a2ad61c8e0b9b37ecedc0499de52d08 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 20 May 2025 13:37:38 +0200 Subject: [PATCH 173/234] Added median comparison to experiments file --- .../compare_hypertuners_paper.json | 45 ++++++++++++------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 67e956a..1602618 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -68,6 +68,19 @@ "display_name": "Dual Annealing tuned", "autotuner": "KernelTuner" }, + { + "name": "dual_annealing_median", + "search_method": "dual_annealing", + "search_method_hyperparameters": [ + { + "name": "method", + "value": "SLSQP" + } + ], + "display_name": "Dual Annealing median", + "autotuner": "KernelTuner", + "color_parent": "dual_annealing_tuned" + }, { "name": "dual_annealing_inv_tuned", "search_method": "dual_annealing", @@ -106,27 +119,27 @@ "autotuner": "KernelTuner" }, { - "name": "genetic_algorithm_inv_tuned", + "name": "genetic_algorithm_median", "search_method": "genetic_algorithm", "search_method_hyperparameters": [ { "name": "popsize", - "value": 10 + "value": 20 }, { "name": "maxiter", - "value": 50 + "value": 150 }, { "name": "method", - "value": "single_point" + "value": "uniform" }, { "name": "mutation_chance", "value": 10 } ], - "display_name": "Genetic Algorithm untuned", + "display_name": "Genetic Algorithm median", "autotuner": "KernelTuner", "color_parent": "genetic_algorithm_tuned" }, @@ -155,27 +168,27 @@ "autotuner": "KernelTuner" }, { - "name": "pso_inv_tuned", + "name": "pso_median", "search_method": "pso", "search_method_hyperparameters": [ { "name": "popsize", - "value": 10 + "value": 20 }, { "name": "maxiter", - "value": 50 + "value": 100 }, { "name": "c1", - "value": 1.0 + "value": 2.0 }, { "name": "c2", - "value": 1.5 + "value": 0.5 } ], - "display_name": "PSO untuned", + "display_name": "PSO median", "autotuner": "KernelTuner", "color_parent": "pso_tuned" }, @@ -204,12 +217,12 @@ "autotuner": "KernelTuner" }, { - "name": "simulated_annealing_inv_tuned", + "name": "simulated_annealing_median", "search_method": "simulated_annealing", "search_method_hyperparameters": [ { "name": "T", - "value": 1.0 + "value": 1.5 }, { "name": "T_min", @@ -217,14 +230,14 @@ }, { "name": "alpha", - "value": 0.9925 + "value": 0.9975 }, { "name": "maxiter", - "value": 1 + "value": 2 } ], - "display_name": "Simulated Annealing untuned", + "display_name": "Simulated Annealing median", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_tuned" } From 4a103a878033296c6cef90e683542cb29e215d93 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 20 May 2025 13:42:28 +0200 Subject: [PATCH 174/234] Added mean configurations to experiments file --- .../compare_hypertuners_paper.json | 49 +++++++------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 1602618..88e7a82 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -69,28 +69,15 @@ "autotuner": "KernelTuner" }, { - "name": "dual_annealing_median", + "name": "dual_annealing_mean", "search_method": "dual_annealing", "search_method_hyperparameters": [ { "name": "method", - "value": "SLSQP" + "value": "trust-constr" } ], - "display_name": "Dual Annealing median", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_tuned" - }, - { - "name": "dual_annealing_inv_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Nelder-Mead" - } - ], - "display_name": "Dual Annealing untuned", + "display_name": "Dual Annealing mean", "autotuner": "KernelTuner", "color_parent": "dual_annealing_tuned" }, @@ -119,7 +106,7 @@ "autotuner": "KernelTuner" }, { - "name": "genetic_algorithm_median", + "name": "genetic_algorithm_mean", "search_method": "genetic_algorithm", "search_method_hyperparameters": [ { @@ -128,18 +115,18 @@ }, { "name": "maxiter", - "value": 150 + "value": 50 }, { "name": "method", - "value": "uniform" + "value": "disruptive_uniform" }, { "name": "mutation_chance", - "value": 10 + "value": 20 } ], - "display_name": "Genetic Algorithm median", + "display_name": "Genetic Algorithm mean", "autotuner": "KernelTuner", "color_parent": "genetic_algorithm_tuned" }, @@ -168,7 +155,7 @@ "autotuner": "KernelTuner" }, { - "name": "pso_median", + "name": "pso_mean", "search_method": "pso", "search_method_hyperparameters": [ { @@ -177,18 +164,18 @@ }, { "name": "maxiter", - "value": 100 + "value": 50 }, { "name": "c1", - "value": 2.0 + "value": 1.0 }, { "name": "c2", - "value": 0.5 + "value": 1.0 } ], - "display_name": "PSO median", + "display_name": "PSO mean", "autotuner": "KernelTuner", "color_parent": "pso_tuned" }, @@ -217,27 +204,27 @@ "autotuner": "KernelTuner" }, { - "name": "simulated_annealing_median", + "name": "simulated_annealing_mean", "search_method": "simulated_annealing", "search_method_hyperparameters": [ { "name": "T", - "value": 1.5 + "value": 1.0 }, { "name": "T_min", - "value": 0.01 + "value": 0.001 }, { "name": "alpha", - "value": 0.9975 + "value": 0.995 }, { "name": "maxiter", "value": 2 } ], - "display_name": "Simulated Annealing median", + "display_name": "Simulated Annealing mean", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_tuned" } From 9fa1e254a3e514b2234ec544e109df822a66a502 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 24 May 2025 12:50:49 +0200 Subject: [PATCH 175/234] Updated hyperparametertuning cachefiles, added firefly --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index a1da35a..8eeb99e 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit a1da35a9850a8fd4e23789e4ac326ae7aa7e8f74 +Subproject commit 8eeb99e7179e6838af95d392ac6c60cebfd8c434 From 8ff98f9d480a52a751add6bab07f7f9501d1fdfc Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 24 May 2025 12:52:01 +0200 Subject: [PATCH 176/234] Changed hypertuning comparison applications --- .../compare_hypertuners_metastrategy.json | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json index 4761856..62e3923 100644 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ b/experiment_files/compare_hypertuners_metastrategy.json @@ -5,9 +5,9 @@ "experimental_groups_defaults": { "applications": [ { - "name": "diff_evo", + "name": "genetic_algorithm", "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", - "input_file": "metatuning_diff_evo.json", + "input_file": "metatuning_genetic_algorithm.json", "objective_performance_keys": [ "score" ] @@ -20,14 +20,6 @@ "score" ] }, - { - "name": "genetic_algorithm", - "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", - "input_file": "metatuning_genetic_algorithm.json", - "objective_performance_keys": [ - "score" - ] - }, { "name": "simulated_annealing", "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", @@ -51,24 +43,18 @@ "ignore_cache": false }, "search_strategies": [ - { - "name": "meta_genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", - "autotuner": "KernelTuner" - }, - { - "name": "meta_diff_evo", - "search_method": "diff_evo", - "display_name": "Differential Evolution", - "autotuner": "KernelTuner" - }, { "name": "meta_dual_annealing", "search_method": "dual_annealing", "display_name": "Dual Annealing", "autotuner": "KernelTuner" }, + { + "name": "meta_genetic_algorithm", + "search_method": "genetic_algorithm", + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner" + }, { "name": "meta_pso", "search_method": "pso", @@ -83,7 +69,7 @@ } ], "statistics_settings": { - "cutoff_percentile": 0.4, + "cutoff_percentile": 0.95, "cutoff_percentile_start": 0.0, "cutoff_type": "time", "objective_time_keys": [ From 8e1f0d6423eae1ef6ca2db01b156c0b9d8112251 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 24 May 2025 12:52:41 +0200 Subject: [PATCH 177/234] Updated names --- experiment_files/compare_hypertuners_paper.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json index 88e7a82..53d9e61 100644 --- a/experiment_files/compare_hypertuners_paper.json +++ b/experiment_files/compare_hypertuners_paper.json @@ -65,7 +65,7 @@ "value": "COBYLA" } ], - "display_name": "Dual Annealing tuned", + "display_name": "Dual Annealing optimal", "autotuner": "KernelTuner" }, { @@ -102,7 +102,7 @@ "value": 5 } ], - "display_name": "Genetic Algorithm tuned", + "display_name": "Genetic Algorithm optimal", "autotuner": "KernelTuner" }, { @@ -151,7 +151,7 @@ "value": 0.5 } ], - "display_name": "PSO tuned", + "display_name": "PSO optimal", "autotuner": "KernelTuner" }, { @@ -200,7 +200,7 @@ "value": 1 } ], - "display_name": "Simulated Annealing tuned", + "display_name": "Simulated Annealing optimal", "autotuner": "KernelTuner" }, { From 99ee9ba1fa9970ae1396a144057d62d896a4d522 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 24 May 2025 12:53:07 +0200 Subject: [PATCH 178/234] Print on adjusted index for real vmin in aggregated plot --- src/autotuning_methodology/visualize_experiments.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index d0d1517..098ca6f 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1348,7 +1348,9 @@ def plot_strategies_aggregated( real_stopping_point_index_max = max(real_stopping_point_indices) if tmin == "real": # stop the time at the largest real stopping point - y_axis_size = min(real_stopping_point_index_max, y_axis_size) + if real_stopping_point_index_max < y_axis_size: + y_axis_size = real_stopping_point_index_max + print(f" adjusted stopping point index: {real_stopping_point_index_max}/{y_axis_size}") time_range = np.arange(y_axis_size) elif tmin < 1.0: # stop the time at the given tmin From cd7277bda425fc8caa087695735924f95c530550 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 2 Jun 2025 07:38:45 +0200 Subject: [PATCH 179/234] Adjusted plot sizes --- src/autotuning_methodology/visualize_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 098ca6f..80330d1 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -777,7 +777,7 @@ def norm_color_val(v): # plot the aggregation if continue_after_comparison or not (compare_baselines or compare_split_times): fig, axs = plt.subplots( - ncols=1, figsize=(7.5, 5), dpi=300 + ncols=1, figsize=(7.5, 4.1), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] From 5c165285b3e5effe42e685f6bf7733439d74d4b3 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 2 Jun 2025 15:25:10 +0200 Subject: [PATCH 180/234] Implemented passing custom search method path --- src/autotuning_methodology/runner.py | 6 +++++- src/autotuning_methodology/schemas/experiments.json | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 589c0c0..0689370 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -139,6 +139,10 @@ def tune_with_kerneltuner(): from kernel_tuner import tune_kernel_T1 samples = group["samples"] + strategy_options = group.get("budget", {}) + if "custom_search_method_path" in group: + # if a custom search method is specified, use it + strategy_options["custom_search_method_path"] = group["custom_search_method_path"] with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -149,7 +153,7 @@ def tune_with_kerneltuner(): simulation_mode=True, output_T4=True, iterations=samples, - strategy_options=group["budget"], + strategy_options=strategy_options, ) if "max_fevals" in group["budget"]: max_fevals = group["budget"]["max_fevals"] diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 740cf5d..8d81447 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -178,8 +178,12 @@ "description": "Path to the library of the autotuner", "type": "string" }, + "custom_search_method_path": { + "description": "Optional path to a custom search method implementation.", + "type": "string" + }, "search_method": { - "description": "Name of the search method as recognized by the autotuner", + "description": "Name of the search method as recognized by the autotuner, Class name if custom search method", "type": "string" }, "search_method_hyperparameters": { From eac2e2257edc7beab09b48777ce451d8587b21c4 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 12 Jun 2025 12:18:41 +0200 Subject: [PATCH 181/234] Added experiments file for comparing constrained to non-constrained strategies --- .../compare_constrained_strategies.json | 251 ++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 experiment_files/compare_constrained_strategies.json diff --git a/experiment_files/compare_constrained_strategies.json b/experiment_files/compare_constrained_strategies.json new file mode 100644 index 0000000..86482ef --- /dev/null +++ b/experiment_files/compare_constrained_strategies.json @@ -0,0 +1,251 @@ +{ + "version": "1.2.0", + "name": "Compare constrained strategies", + "parent_folder": "./constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "firefly_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Firefly constrained", + "autotuner": "KernelTuner" + }, + { + "name": "firefly_non_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "Firefly non-constrained", + "autotuner": "KernelTuner", + "color_parent": "firefly_constrained" + }, + { + "name": "greedy_ils_constrained", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Greedy ILS constrained", + "autotuner": "KernelTuner" + }, + { + "name": "greedy_ils_non_constrained", + "search_method": "greedy_ils", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "Greedy ILS non-constrained", + "autotuner": "KernelTuner", + "color_parent": "greedy_ils_constrained" + }, + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Genetic Algorithm constrained", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_non_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "Genetic Algorithm non-constrained", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_constrained" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "PSO constrained", + "autotuner": "KernelTuner" + }, + { + "name": "pso_mean", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "PSO non-constrained", + "autotuner": "KernelTuner", + "color_parent": "pso_constrained" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Simulated Annealing constrained", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_non_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "Simulated Annealing non-constrained", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_constrained" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "searchspaces" + ], + "bins": 100 + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3 + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 870dad7e2cb4dbeeb9da6e3b14e14ba133060e1b Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 19 Jun 2025 21:13:36 +0200 Subject: [PATCH 182/234] Added pyATF strategies to comparison --- .../compare_constrained_strategies.json | 100 ++++++++------ experiment_files/nikki_compare.json | 122 ++++++++++++++++++ 2 files changed, 183 insertions(+), 39 deletions(-) create mode 100644 experiment_files/nikki_compare.json diff --git a/experiment_files/compare_constrained_strategies.json b/experiment_files/compare_constrained_strategies.json index 86482ef..4d6cf6a 100644 --- a/experiment_files/compare_constrained_strategies.json +++ b/experiment_files/compare_constrained_strategies.json @@ -68,44 +68,6 @@ "display_name": "Firefly constrained", "autotuner": "KernelTuner" }, - { - "name": "firefly_non_constrained", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "Firefly non-constrained", - "autotuner": "KernelTuner", - "color_parent": "firefly_constrained" - }, - { - "name": "greedy_ils_constrained", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "Greedy ILS constrained", - "autotuner": "KernelTuner" - }, - { - "name": "greedy_ils_non_constrained", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "Greedy ILS non-constrained", - "autotuner": "KernelTuner", - "color_parent": "greedy_ils_constrained" - }, { "name": "genetic_algorithm_constrained", "search_method": "genetic_algorithm", @@ -144,7 +106,7 @@ "autotuner": "KernelTuner" }, { - "name": "pso_mean", + "name": "pso_non_constrained", "search_method": "pso", "search_method_hyperparameters": [ { @@ -180,6 +142,66 @@ "display_name": "Simulated Annealing non-constrained", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_constrained" + }, + { + "name": "pyatf_simulated_annealing", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "simulated_annealing" + } + ], + "display_name": "pyATF Simulated Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_auc_bandit", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "auc_bandit" + } + ], + "display_name": "pyATF AUC Bandit", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_differential_evolution", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "differential_evolution" + } + ], + "display_name": "pyATF Differential Evolution", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_round_robin", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "round_robin" + } + ], + "display_name": "pyATF Round Robin", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_torczon", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "torczon" + } + ], + "display_name": "pyATF Torczon", + "autotuner": "KernelTuner" } ], "statistics_settings": { diff --git a/experiment_files/nikki_compare.json b/experiment_files/nikki_compare.json new file mode 100644 index 0000000..4d58791 --- /dev/null +++ b/experiment_files/nikki_compare.json @@ -0,0 +1,122 @@ +{ + "version": "1.2.0", + "name": "Compare Nikki's LLMAEA algorithms", + "parent_folder": "./compare_nikki", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "nikki_good_alg_gemm", + "search_method": "AdaptiveSimulatedAnnealing", + "display_name": "Adaptive Simulated Annealing", + "autotuner": "KernelTuner", + "custom_search_method_path": "../nikki_good_alg_gemm.py" + }, + { + "name": "pso", + "search_method": "pso", + "display_name": "PSO", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing", + "search_method": "simulated_annealing", + "display_name": "Simulated Annealing", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 1039d056b66880e2075337ad2ad95f1b25e06d70 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 24 Jun 2025 15:36:38 +0200 Subject: [PATCH 183/234] Introduced the 'cap to vmin' argument, allowing values under the heatmap vmin to be capped to the vmin (must be used with care) --- src/autotuning_methodology/caching.py | 2 +- src/autotuning_methodology/schemas/experiments.json | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index bbb7d45..75dcb18 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -121,7 +121,7 @@ def is_same_as(self, other: ResultsDescription) -> bool: # check if same value for each key for attribute_key, attribute_value in self.__get_as_dict().items(): - if attribute_key == "group_display_name" or attribute_key == "visualization_caches_path": + if attribute_key == "group_display_name" or attribute_key == "visualization_caches_path" or attribute_key == "run_folder": continue else: assert ( diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 8d81447..8d4b93c 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -364,6 +364,10 @@ "minimum": 2, "default": 5 }, + "cap_to_vmin": { + "description": "Whether to allow values below vmin to be capped at vmin in the heatmap (must be reported if applied).", + "type": "boolean" + }, "include_y_labels": { "description": "Whether to show the y-axis labels on the heatmap. Displayed if not set.", "type": "boolean" From 04de14b03647ef15fa4e30f0379caa61772c0aad Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 25 Jun 2025 11:33:34 +0200 Subject: [PATCH 184/234] New, more extensive color mapping and improved capping --- .../visualize_experiments.py | 72 ++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 80330d1..fe37283 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -9,7 +9,8 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import LinearSegmentedColormap, rgb2hex +from matplotlib.colors import to_rgb, to_hex +# from matplotlib.colors import LinearSegmentedColormap, rgb2hex from autotuning_methodology.baseline import ( Baseline, @@ -37,13 +38,63 @@ objective_time_keys_values = ["compilation", "benchmark", "framework", "search_algorithm", "validation"] -def get_colors(strategies: list[dict], scale_margin_left=0.4, scale_margin_right=0.15): +def lighten_color(color, amount: float = 0.5): + """Lightens the given color by interpolating it toward white.""" + r, g, b = to_rgb(color) + return to_hex([(1 - amount) * c + amount for c in (r, g, b)]) + +def get_colors(strategies: list[dict]) -> list: + """Assign colors using the tab10 colormap, with lighter shades for children.""" + tab10 = plt.get_cmap("tab10").colors + max_parents = len(tab10) + strategy_parents = defaultdict(list) + + # Group children under their parents + for i, strategy in enumerate(strategies): + if "color_parent" in strategy: + strategy_parents[strategy["color_parent"]].append(i) + + if len(strategy_parents) > max_parents: + raise ValueError(f"Too many color parents: max supported is {max_parents} using tab10") + + parent_colors = {} + colors = [None] * len(strategies) + color_index = 0 + + for i, strategy in enumerate(strategies): + name = strategy["name"] + if name in strategy_parents: + children_indices = strategy_parents[name] + if len(children_indices) > 2: + raise ValueError(f"Color parent '{name}' has more than two children") + base_color = tab10[color_index] + parent_colors[name] = { + idx: lighten_color(base_color, amount=0.4 + 0.3 * j) + for j, idx in enumerate(children_indices) + } + colors[i] = to_hex(base_color) + color_index += 1 + elif "color_parent" in strategy: + parent = strategy["color_parent"] + colors[i] = parent_colors[parent][i] + else: + if color_index >= len(tab10): + raise ValueError("Too many unparented strategies for tab10 colormap") + colors[i] = to_hex(tab10[color_index]) + color_index += 1 + + return colors + + +def get_colors_old(strategies: list[dict], scale_margin_left=0.4, scale_margin_right=0.15) -> list: """Function to get the colors for each of the strategies.""" default_colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] - main_colors = ["Blues", "Greens", "Reds", "Purples", "Greys"] + main_colors = ["Blues", "Greens", "Reds", "Purples", "Greys", "Oranges"] main_color_counter = 0 strategy_parents = defaultdict(list) + # TODO switch to qualitative colormaps, e.g. tab10 if no children, otherwise tab20 (https://matplotlib.org/stable/users/explain/colors/colormaps.html#qualitative) + # get the dictionary of parents with the index of their child strategies for strategy_index, strategy in enumerate(strategies): if "color_parent" in strategy: @@ -192,11 +243,12 @@ def __init__( compare_baselines: bool = self.experiment["visualization_settings"]["compare_baselines"] compare_split_times: bool = self.experiment["visualization_settings"]["compare_split_times"] confidence_level: float = self.experiment["visualization_settings"]["confidence_level"] - self.colors = get_colors( - self.strategies, - scale_margin_left=self.experiment["visualization_settings"].get("color_parent_scale_margin_left", 0.4), - scale_margin_right=self.experiment["visualization_settings"].get("color_parent_scale_margin_right", 0.1), - ) + self.colors = get_colors(self.strategies) + # self.colors = get_colors( + # self.strategies, + # scale_margin_left=self.experiment["visualization_settings"].get("color_parent_scale_margin_left", 0.4), + # scale_margin_right=self.experiment["visualization_settings"].get("color_parent_scale_margin_right", 0.1), + # ) self.plot_skip_strategies: list[str] = list() if use_strategy_as_baseline is not None: self.plot_skip_strategies.append(use_strategy_as_baseline) @@ -374,6 +426,7 @@ def __init__( cmin = plot.get("cmin", vmin) # colorbar lower limit cmax = plot.get("cmax", vmax) # colorbar upper limit cnum = plot.get("cnum", 5) # number of ticks on the colorbar + cap_to_vmin = plot.get("cap_to_vmin", False) # whether to cap the values to vmin divide_train_test_axis = plot.get( "divide_train_test_axis", False ) # whether to add visual indication for train/test split @@ -542,6 +595,9 @@ def norm_color_val(v): ) # validate the data is within the vmin-vmax range and visible colorbar range + assert not (plot_data > 1.0).any(), "Plot data contains values greater than 1.0, which should not be possible. Please investigate." + if cap_to_vmin: + plot_data = np.clip(plot_data, vmin, 1.0) outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) assert ( len(outside_range[0]) == 0 and len(outside_range[1]) == 0 From f73345c7481218c672967b7f26066f5dc6bf6a7b Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 26 Jun 2025 09:07:51 +0200 Subject: [PATCH 185/234] Both sides of heatmaps can now be plotted in one experiments run --- src/autotuning_methodology/visualize_experiments.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index fe37283..2aa88e6 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import to_rgb, to_hex +from matplotlib.colors import to_rgb, to_hex, LinearSegmentedColormap # from matplotlib.colors import LinearSegmentedColormap, rgb2hex from autotuning_methodology.baseline import ( @@ -727,9 +727,14 @@ def norm_color_val(v): # finalize the figure and save or display it fig.tight_layout() if save_figs: + suffix = "" + if include_colorbar: + suffix += "_colorbar" + if include_y_labels: + suffix += "_ylabels" filename_path = ( Path(self.plot_filename_prefix) - / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}" + / f"{strategy_name}_heatmap_{'_'.join(plot_x_value_types)}_{'_'.join(plot_y_value_types)}{suffix}" ) fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") From df49cb129698c7bb1bcd0b66e1b8c915b3583577 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 26 Jun 2025 09:08:28 +0200 Subject: [PATCH 186/234] Experiment files for Constrained Optimization paper are now split into three parts --- ...=> compare_constrained_strategies_kt.json} | 94 ++---- ...re_constrained_strategies_pyatf_cache.json | 279 ++++++++++++++++++ ...mpare_constrained_strategies_pyatf_kt.json | 240 +++++++++++++++ 3 files changed, 544 insertions(+), 69 deletions(-) rename experiment_files/{compare_constrained_strategies.json => compare_constrained_strategies_kt.json} (75%) create mode 100644 experiment_files/compare_constrained_strategies_pyatf_cache.json create mode 100644 experiment_files/compare_constrained_strategies_pyatf_kt.json diff --git a/experiment_files/compare_constrained_strategies.json b/experiment_files/compare_constrained_strategies_kt.json similarity index 75% rename from experiment_files/compare_constrained_strategies.json rename to experiment_files/compare_constrained_strategies_kt.json index 4d6cf6a..12fe630 100644 --- a/experiment_files/compare_constrained_strategies.json +++ b/experiment_files/compare_constrained_strategies_kt.json @@ -1,7 +1,7 @@ { "version": "1.2.0", - "name": "Compare constrained strategies", - "parent_folder": "./constrained_optimization", + "name": "Compare constrained strategies Kernel Tuner", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", "experimental_groups_defaults": { "applications": [ { @@ -68,6 +68,18 @@ "display_name": "Firefly constrained", "autotuner": "KernelTuner" }, + { + "name": "firefly_constrained_non_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "Firefly constrained (non-constrained)", + "autotuner": "KernelTuner" + }, { "name": "genetic_algorithm_constrained", "search_method": "genetic_algorithm", @@ -142,66 +154,6 @@ "display_name": "Simulated Annealing non-constrained", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_constrained" - }, - { - "name": "pyatf_simulated_annealing", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "simulated_annealing" - } - ], - "display_name": "pyATF Simulated Annealing", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_auc_bandit", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "auc_bandit" - } - ], - "display_name": "pyATF AUC Bandit", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_differential_evolution", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "differential_evolution" - } - ], - "display_name": "pyATF Differential Evolution", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_round_robin", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "round_robin" - } - ], - "display_name": "pyATF Round Robin", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_torczon", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "torczon" - } - ], - "display_name": "pyATF Torczon", - "autotuner": "KernelTuner" } ], "statistics_settings": { @@ -221,7 +173,7 @@ "fevals" ], "y_axis_value_types": [ - "normalized", + "absolute", "baseline" ] }, @@ -232,7 +184,7 @@ "time" ], "y_axis_value_types": [ - "normalized", + "absolute", "baseline" ] }, @@ -240,12 +192,14 @@ "scope": "search_strategy", "style": "heatmap", "x_axis_value_types": [ - "time" + "applications" ], "y_axis_value_types": [ - "searchspaces" + "gpus" ], - "bins": 100 + "cmin": -8.0, + "include_y_labels": true, + "include_colorbar": false }, { "scope": "search_strategy", @@ -256,8 +210,10 @@ "y_axis_value_types": [ "gpus" ], - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3 + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true }, { "scope": "aggregate", diff --git a/experiment_files/compare_constrained_strategies_pyatf_cache.json b/experiment_files/compare_constrained_strategies_pyatf_cache.json new file mode 100644 index 0000000..09f2e27 --- /dev/null +++ b/experiment_files/compare_constrained_strategies_pyatf_cache.json @@ -0,0 +1,279 @@ +{ + "version": "1.2.0", + "name": "Compare constrained strategies pyATF cached vs non-cached", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "pyatf_simulated_annealing", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "simulated_annealing" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Simulated Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_simulated_annealing_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "simulated_annealing" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF Simulated Annealing (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_simulated_annealing" + }, + { + "name": "pyatf_auc_bandit", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "auc_bandit" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF AUC Bandit", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_auc_bandit_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "auc_bandit" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF AUC Bandit (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_auc_bandit" + }, + { + "name": "pyatf_differential_evolution", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "differential_evolution" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Differential Evolution", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_differential_evolution_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "differential_evolution" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF Differential Evolution (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_differential_evolution" + }, + { + "name": "pyatf_pattern_search", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "pattern_search" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Pattern Search", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_pattern_search_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "pattern_search" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF Pattern Search (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_pattern_search" + }, + { + "name": "pyatf_round_robin", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "round_robin" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Round Robin", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_round_robin_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "round_robin" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF Round Robin (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_round_robin" + }, + { + "name": "pyatf_torczon", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "torczon" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Torczon", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_torczon_no_cache", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "torczon" + }, + { + "name": "use_searchspace_cache", + "value": false + } + ], + "display_name": "pyATF Torczon (no cache)", + "autotuner": "KernelTuner", + "color_parent": "pyatf_torczon" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/compare_constrained_strategies_pyatf_kt.json b/experiment_files/compare_constrained_strategies_pyatf_kt.json new file mode 100644 index 0000000..9cfd49e --- /dev/null +++ b/experiment_files/compare_constrained_strategies_pyatf_kt.json @@ -0,0 +1,240 @@ +{ + "version": "1.2.0", + "name": "Compare constrained strategies pyATF vs KT", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT Genetic Algorithm", + "autotuner": "KernelTuner" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT PSO", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT Simulated Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_simulated_annealing", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "simulated_annealing" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Simulated Annealing", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_auc_bandit", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "auc_bandit" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF AUC Bandit", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_differential_evolution", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "differential_evolution" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Differential Evolution", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_pattern_search", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "pattern_search" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Pattern Search", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_round_robin", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "round_robin" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Round Robin", + "autotuner": "KernelTuner" + }, + { + "name": "pyatf_torczon", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "torczon" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Torczon", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "include_y_labels": true, + "include_colorbar": false + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From cb2c114e024734d54f11ba73812457cd312783ec Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 26 Jun 2025 23:00:50 +0200 Subject: [PATCH 187/234] Improved aggregate figure size to fit y-label --- src/autotuning_methodology/visualize_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 2aa88e6..805160b 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -838,7 +838,7 @@ def norm_color_val(v): # plot the aggregation if continue_after_comparison or not (compare_baselines or compare_split_times): fig, axs = plt.subplots( - ncols=1, figsize=(7.5, 4.1), dpi=300 + ncols=1, figsize=(7.5, 4.4), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] From cc1195ef9cb813cd3d6f5fb7a9eddb6feb843f1e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 01:26:41 +0200 Subject: [PATCH 188/234] Implemented the optional parameter, allowing curves to be returned fused instead of split on real / fictional --- src/autotuning_methodology/baseline.py | 12 ++++---- src/autotuning_methodology/curves.py | 39 +++++++++++++++----------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index 051a328..5c7b33f 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -178,13 +178,13 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return super().get_curve(range, x_type, dist, confidence_level) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if self.simulate: return self._get_random_curve_means(fevals_range) return self._get_random_curve(fevals_range) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 fevals_range = self.time_to_fevals(time_range) curve_over_time = self.get_curve_over_fevals(fevals_range, dist, confidence_level) smoothing_factor = 0.0 @@ -327,7 +327,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return super().get_curve(range, x_type, dist, confidence_level) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if self.use_index: return self.searchspace_stats.objective_performances_total_sorted_nan[ @@ -337,7 +337,7 @@ def get_curve_over_fevals( # noqa: D102 assert self.y_array.ndim == 1 return self.y_array[fevals_range] - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 predicted_y_values = self._ir.predict(time_range) if not self.use_index: return predicted_y_values @@ -416,7 +416,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist=None, confidence_level= return self.stochastic_curve_to_deterministic(range=range, curve=stochastic_curve) def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist=None, confidence_level=None + self, fevals_range: np.ndarray, dist=None, confidence_level=None, return_split=False ) -> np.ndarray: if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted @@ -427,7 +427,7 @@ def get_curve_over_fevals( # noqa: D102 ) return self.stochastic_curve_to_deterministic(range=fevals_range, curve=stochastic_curve) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None) -> np.ndarray: # noqa: D102 + def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted if confidence_level is None: diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index 79df865..e636fc2 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -116,7 +116,7 @@ class CurveBasis(ABC): """Abstract object providing minimals for visualization and analysis. Implemented by ``Curve`` and ``Baseline``.""" @abstractmethod - def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None): + def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): """Get the curve over the specified range of time or function evaluations. Args: @@ -124,6 +124,7 @@ def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, con x_type: the type of the x-axis range (either time or function evaluations). dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Raises: ValueError: on invalid ``x_type`` argument. @@ -133,19 +134,20 @@ def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, con See ``get_curve_over_fevals()`` and ``get_curve_over_time()`` for more precise return values. """ if x_type == "fevals": - return self.get_curve_over_fevals(range, dist, confidence_level) + return self.get_curve_over_fevals(range, dist, confidence_level, return_split=return_split) elif x_type == "time": - return self.get_curve_over_time(range, dist, confidence_level) + return self.get_curve_over_time(range, dist, confidence_level, return_split=return_split) raise ValueError(f"x_type must be 'fevals' or 'time', is {x_type}") @abstractmethod - def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None): + def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): """Get the curve over function evaluations. Args: fevals_range: the range of function evaluations. dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Returns: Two possible returns, for ``Baseline`` and ``Curve`` respectively: @@ -155,13 +157,14 @@ def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = Non raise NotImplementedError @abstractmethod - def get_curve_over_time(self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None): + def get_curve_over_time(self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): """Get the curve over time. Args: time_range: the range of time. dist: the distribution, used for looking up indices. Ignored in ``Baseline``. Defaults to None. confidence_level: confidence level for the confidence interval. Ignored in ``Baseline``. Defaults to None. + return_split: whether to return the arrays split at the real / fictional point. Defaults to True. Returns: Two possible returns, for ``Baseline`` and ``Curve`` respectively: @@ -537,9 +540,9 @@ def _check_curve_real_fictional_consistency( ), f"Unequal arrays: {curve_upper_err}, {curve_upper_err_real}" def get_curve( # noqa: D102 - self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None + self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True ): - return super().get_curve(range, x_type, dist, confidence_level) + return super().get_curve(range, x_type, dist, confidence_level, return_split=return_split) def _get_matching_feval_indices_in_range(self, fevals_range: np.ndarray) -> np.ndarray: """Get a mask of where the fevals range matches with the data.""" @@ -615,7 +618,7 @@ def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tu return fevals, masked_values def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None + self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True ): fevals, masked_values = self._get_curve_over_fevals_values_in_range(fevals_range) @@ -687,10 +690,12 @@ def get_curve_over_fevals( # noqa: D102 assert np.all(~np.isnan(curve_lower_err)), f"NaNs at {np.nonzero(np.isnan(curve_lower_err))[0]}" assert np.all(~np.isnan(curve_upper_err)), f"NaNs at {np.nonzero(np.isnan(curve_upper_err))[0]}" - # return the curves split in real and fictional - return self._get_curve_split_real_fictional_parts( - real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err - ) + if return_split: + # return the curves split in real and fictional + return self._get_curve_split_real_fictional_parts( + real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err + ) + return real_stopping_point_index + 1, fevals_range, curve, curve_lower_err, curve_upper_err def _get_curve_over_time_values_in_range( self, time_range: np.ndarray, return_1d=True @@ -772,7 +777,7 @@ def _get_curve_over_time_values_in_range( return times, values, real_stopping_point_time, num_fevals, num_repeats def get_curve_over_time( # noqa: D102 - self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, use_bagging=True + self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True, use_bagging=True ): # check the distribution if dist is None: @@ -856,9 +861,11 @@ def get_curve_over_time( # noqa: D102 curve_lower_err[real_stopping_point_index:] = curve_lower_err[real_stopping_point_index] curve_upper_err[real_stopping_point_index:] = curve_upper_err[real_stopping_point_index] - return self._get_curve_split_real_fictional_parts( - real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err - ) + if return_split: + return self._get_curve_split_real_fictional_parts( + real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err + ) + return real_stopping_point_index, time_range, curve, curve_lower_err, curve_upper_err def get_split_times( # noqa: D102 self, range: np.ndarray, x_type: str, searchspace_stats: SearchspaceStatistics From ef94bfb0a9edb56b21062570da49ec4917180e5e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 01:28:03 +0200 Subject: [PATCH 189/234] Implemented head2head comparison data collection for comparing strategies performance or time differences in a head-to-head manner at specific time points --- .../schemas/experiments.json | 27 ++++- .../visualize_experiments.py | 109 +++++++++++++++++- 2 files changed, 132 insertions(+), 4 deletions(-) diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 8d4b93c..65b48be 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -324,7 +324,8 @@ "enum": [ "line", "scatter", - "heatmap" + "heatmap", + "head2head" ] }, "tmin": { @@ -368,6 +369,30 @@ "description": "Whether to allow values below vmin to be capped at vmin in the heatmap (must be reported if applied).", "type": "boolean" }, + "comparison": { + "description": "For head2head plots, the description of how to compare the search strategies.", + "type": "object", + "required": [ + "relative_time", + "unit" + ], + "properties": { + "relative_time": { + "description": "The relative time at which to compare the search strategies.", + "type": "number", + "exclusiveMinimum": 0, + "inclusiveMaximum": 1 + }, + "unit": { + "description": "The unit of the value with which to compare the search strategies (difference in time or performance).", + "type": "string", + "enum": [ + "objective", + "time" + ] + } + } + }, "include_y_labels": { "description": "Whether to show the y-axis labels on the heatmap. Displayed if not set.", "type": "boolean" diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 805160b..76f0244 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -833,10 +833,50 @@ def norm_color_val(v): vmin: float = plot.get("vmin", None) # visual range lower limit if scope != "aggregate": continue - if style != "line": - raise NotImplementedError(f"{scope} currently only supports 'line' as a style, not {style}") + if style != "line" and style != "head2head": + raise NotImplementedError(f"{scope} does currently not support {style}, create an issue to request it.") + if style == "head2head": + compare_at_relative_time = plot["comparison"]["relative_time"] + comparison_unit = plot["comparison"]["unit"] + + # the comparison data will be a double nested dictionary of the strategy indices + comparison_data = dict() + for strategy_index_alpha in range(len(self.strategies)): + comparison_data[strategy_index_alpha] = dict() + for strategy_index_beta in range(len(self.strategies)): + comparison_data[strategy_index_alpha][strategy_index_beta] = list() + + # iterate over the searchspaces and strategies to get head2head data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + print(f" | visualizing head2head of {application_name} for {gpu_name}") + + # unpack the aggregation data + _, strategies_curves, searchspace_stats, time_range, _ = aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + + # get the head2head comparison data + comparison_data_ss = self.get_head2head_comparison_data( + "time", + compare_at_relative_time, + comparison_unit, + searchspace_stats, + strategies_curves, + time_range, + ) + + # for this searchspace, append each strategy's data to the comparison data + for strategy_index_alpha in range(len(self.strategies)): + for strategy_index_beta in range(len(self.strategies)): + comparison_data[strategy_index_alpha][strategy_index_beta].append( + comparison_data_ss[strategy_index_alpha][strategy_index_beta] + ) + + raise ValueError(comparison_data) + # plot the aggregation - if continue_after_comparison or not (compare_baselines or compare_split_times): + if style == "line" and (continue_after_comparison or not (compare_baselines or compare_split_times)): fig, axs = plt.subplots( ncols=1, figsize=(7.5, 4.4), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. @@ -1146,6 +1186,69 @@ def plot_split_times_bar_comparison( else: plt.show() + def get_head2head_comparison_data( + self, + x_type: str, + compare_at_relative_time: float, + comparison_unit: str, + searchspace_stats: SearchspaceStatistics, + strategies_curves: list[Curve], + x_axis_range: np.ndarray, + ): + """Gets the data for a head-to-head comparison of strategies. + + Args: + x_type: the type of ``x_axis_range``. + compare_at_relative_time: the relative point in time to compare at, between 0.0 and 1.0. + comparison_unit: the unit to compare with, 'time' or 'objective'. + searchspace_stats: the Searchspace statistics object. + strategies_curves: the strategy curves to draw in the plot. + x_axis_range: the time or function evaluations range to plot on. + + Returns: + A doubly-nested dictionary with strategy names as keys and how much better outer performs relative to inner. + """ + comparison_point = x_axis_range[-1] * compare_at_relative_time + comparison_data = dict() + confidence_level = 0.95 # irrelevant because the confidence intervals are not used + dist = searchspace_stats.objective_performances_total_sorted + for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): + inner_comparison_data = dict() + strategy_curve_alpha = strategies_curves[strategy_index_alpha] + _, time_range_alpha, curve_alpha, _, _ = strategy_curve_alpha.get_curve(x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False) + + # find the index of the closest time and performance to the comparison point + closest_index_alpha = np.argmin(np.abs(time_range_alpha - comparison_point)) + time_at_comparison_alpha = time_range_alpha[closest_index_alpha] + performance_at_comparison_alpha = curve_alpha[closest_index_alpha] + + # compare against all other strategies + for strategy_index_beta, strategy_beta in enumerate(self.strategies): + if strategy_index_alpha == strategy_index_beta: + inner_comparison_data[strategy_index_beta] = np.nan + continue + strategy_curve_beta = strategies_curves[strategy_index_beta] + _, time_range_beta, curve_beta, _, _ = strategy_curve_beta.get_curve(x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False) + + # calculate the relative difference between the two strategies at the comparison point + if comparison_unit == "time": + # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? + closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) + time_at_comparison_beta = time_range_beta[closest_index_beta] + # outer takes X% of the time inner takes to reach the same performance + inner_comparison_data[strategy_index_beta] = (time_at_comparison_alpha / time_at_comparison_beta) * 100 + elif comparison_unit == "objective": + # given the time at `compare_at_relative_time`, how much worse is the objective value of strategy beta at that moment compared to strategy alpha? + closest_index_beta = np.argmin(np.abs(time_range_beta - time_at_comparison_alpha)) + performance_at_comparison_beta = curve_beta[closest_index_beta] + # outer performance is X% of inner at the same time + inner_comparison_data[strategy_index_beta] = (performance_at_comparison_alpha / performance_at_comparison_beta) * 100 + else: + raise ValueError(f"Invalid comparison unit: {comparison_unit}. Expected 'time' or 'objective'.") + + comparison_data[strategy_index_alpha] = inner_comparison_data + return comparison_data + def plot_strategies( self, style: str, From 5f63d414d3ca618174e36e7c5b4d4818f54f3ab0 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 10:19:55 +0200 Subject: [PATCH 190/234] Applied clipping on the index values to avoid out-of-bounds errors --- src/autotuning_methodology/curves.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index e636fc2..c3be141 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -639,6 +639,8 @@ def get_curve_over_fevals( # noqa: D102 # get the confidence interval indices_lower_err, indices_upper_err = self.get_confidence_interval(indices, confidence_level) indices_lower_err, indices_upper_err = indices_lower_err.astype(int), indices_upper_err.astype(int) + indices_lower_err = np.clip(indices_lower_err, a_min=0, a_max=dist.shape[0] - 1) + indices_upper_err = np.clip(indices_upper_err, a_min=0, a_max=dist.shape[0] - 1) # obtain the curves by looking up the associated values curve = dist[indices_mean] curve_lower_err = dist[indices_lower_err] From 273b341a570e3a3b1cfd0caceac2bdd8c22693f3 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 11:30:11 +0200 Subject: [PATCH 191/234] Implemented the plotting of head2head comparisons --- .../visualize_experiments.py | 132 +++++++++++++----- 1 file changed, 100 insertions(+), 32 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 76f0244..939561d 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -840,39 +840,69 @@ def norm_color_val(v): comparison_unit = plot["comparison"]["unit"] # the comparison data will be a double nested dictionary of the strategy indices - comparison_data = dict() - for strategy_index_alpha in range(len(self.strategies)): - comparison_data[strategy_index_alpha] = dict() - for strategy_index_beta in range(len(self.strategies)): - comparison_data[strategy_index_alpha][strategy_index_beta] = list() + comparison_data_raw = self.get_head2head_comparison_data(aggregation_data, compare_at_relative_time, comparison_unit) - # iterate over the searchspaces and strategies to get head2head data - for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: - for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: - print(f" | visualizing head2head of {application_name} for {gpu_name}") + # convert the comparison data dictionary to a 2D numpy array of means + comparison_data = np.array( + [[np.mean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] + for strategy1 in comparison_data_raw.keys()] + ) - # unpack the aggregation data - _, strategies_curves, searchspace_stats, time_range, _ = aggregation_data[ - get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) - ] - - # get the head2head comparison data - comparison_data_ss = self.get_head2head_comparison_data( - "time", - compare_at_relative_time, - comparison_unit, - searchspace_stats, - strategies_curves, - time_range, - ) + # set up the plot + fig, axs = plt.subplots(ncols=1, figsize=(8, 6), dpi=300) + if not hasattr(axs, "__len__"): + axs = [axs] + ax = axs[0] + title = f"Head-to-head comparison of strategies at {compare_at_relative_time} relative time" + fig.canvas.manager.set_window_title(title) + if not save_figs: + fig.suptitle(title) - # for this searchspace, append each strategy's data to the comparison data - for strategy_index_alpha in range(len(self.strategies)): - for strategy_index_beta in range(len(self.strategies)): - comparison_data[strategy_index_alpha][strategy_index_beta].append( - comparison_data_ss[strategy_index_alpha][strategy_index_beta] - ) + # set the x and y labels + if comparison_unit == "time": + ax.set_xlabel("how much time do these strategies take...") + elif comparison_unit == "objective": + ax.set_xlabel("how much objective value do these strategies achieve...") + ax.set_ylabel("...relative to these strategies") + + # set the x and y ticks + x_ticks = list(comparison_data_raw.keys()) + y_ticks = list(comparison_data_raw.keys()) + # Show all ticks and label them with the respective list entries + ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=15, ha="right", rotation_mode="anchor") + ax.set_yticks(range(len(y_ticks)), labels=y_ticks) + + # plot the comparison data + im = ax.imshow( + comparison_data, + vmin=0.0, + aspect="auto", + ) + cbar = ax.figure.colorbar(im, ax=ax) + cbar.ax.set_ylabel("Difference in time to same objective value (lower is better)", rotation=-90, va="bottom") + if comparison_unit == "objective": + # TODO implement the case for comparison_unit == "objective", check whether it works correctly independent of optimization direction + raise NotImplementedError("Objective value comparison not implemented yet") + + # loop over data dimensions and create text annotations. + for i in range(len(x_ticks)): + for j in range(len(y_ticks)): + number = comparison_data[i, j] + if np.isnan(number): + continue + print(f"{j},{i}: {round(number, 1)}%") + text = ax.text(j, i, f"{round(number, 1)}%", ha="center", va="center", color="black") + print(text) + # finalize the figure and save or display it + fig.tight_layout() + if save_figs: + filename_path = Path(self.plot_filename_prefix) / "head2head_comparison" + fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) + print(f"Figure saved to {filename_path}") + else: + plt.show() + raise ValueError(comparison_data) # plot the aggregation @@ -1186,7 +1216,45 @@ def plot_split_times_bar_comparison( else: plt.show() - def get_head2head_comparison_data( + def get_head2head_comparison_data(self, aggregation_data: dict, compare_at_relative_time: float, comparison_unit: str) -> dict: + """Gets the data for a head-to-head comparison of strategies across all searchspaces.""" + # the comparison data will be a double nested dictionary of the strategy indices + comparison_data = dict() + for strategy_alpha in self.strategies: + comparison_data[strategy_alpha['display_name']] = dict() + for strategy_beta in self.strategies: + comparison_data[strategy_alpha['display_name']][strategy_beta['display_name']] = list() + + # iterate over the searchspaces and strategies to get head2head data + for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: + for application_name in self.experiment["experimental_groups_defaults"]["applications_names"]: + print(f" | visualizing head2head of {application_name} for {gpu_name}") + + # unpack the aggregation data + _, strategies_curves, searchspace_stats, time_range, _ = aggregation_data[ + get_aggregation_data_key(gpu_name=gpu_name, application_name=application_name) + ] + + # get the head2head comparison data + comparison_data_ss = self.get_head2head_comparison_data_searchspace( + "time", + compare_at_relative_time, + comparison_unit, + searchspace_stats, + strategies_curves, + time_range, + ) + + # for this searchspace, append each strategy's data to the comparison data + for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): + for strategy_index_beta, strategy_beta in enumerate(self.strategies): + comparison_data[strategy_alpha['display_name']][strategy_beta['display_name']].append( + comparison_data_ss[strategy_index_alpha][strategy_index_beta] + ) + + return comparison_data + + def get_head2head_comparison_data_searchspace( self, x_type: str, compare_at_relative_time: float, @@ -1194,8 +1262,8 @@ def get_head2head_comparison_data( searchspace_stats: SearchspaceStatistics, strategies_curves: list[Curve], x_axis_range: np.ndarray, - ): - """Gets the data for a head-to-head comparison of strategies. + ) -> dict: + """Gets the data for a head-to-head comparison of strategies on a specific searchspace. Args: x_type: the type of ``x_axis_range``. From bc77f62814f2be15cac978994d61344422900d81 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 12:17:19 +0200 Subject: [PATCH 192/234] Implemented proper color mapping and color bar to head2head plot --- .../visualize_experiments.py | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 939561d..bbbe3c7 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -5,6 +5,7 @@ import warnings from collections import defaultdict from pathlib import Path +from math import ceil import matplotlib.pyplot as plt import numpy as np @@ -846,7 +847,7 @@ def norm_color_val(v): comparison_data = np.array( [[np.mean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] for strategy1 in comparison_data_raw.keys()] - ) + ).transpose() # set up the plot fig, axs = plt.subplots(ncols=1, figsize=(8, 6), dpi=300) @@ -872,13 +873,53 @@ def norm_color_val(v): ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=15, ha="right", rotation_mode="anchor") ax.set_yticks(range(len(y_ticks)), labels=y_ticks) + # set the color map + vmin = 0.0 + vmax = 1000.0 + def norm_color_val(v): + """Normalize a color value to fit in the 0-1 range.""" + return (v - vmin) / (vmax - vmin) + + cmap = LinearSegmentedColormap.from_list( + "head2head_colormap", + [ + (norm_color_val(vmin), "darkgreen"), + (norm_color_val(100.0), "greenyellow"), + (norm_color_val(200.0), "orange"), + (norm_color_val(500.0), "red"), + (norm_color_val(vmax), "darkred"), + # (norm_color_val(vmax), "black"), + ], + ) + + # if there are any values above the vmax, warn + if np.any(comparison_data > vmax): + warn(f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped") + # clip the comparison data to the vmin-vmax range + comparison_data_clipped = np.clip(comparison_data, vmin, vmax) + # plot the comparison data im = ax.imshow( - comparison_data, - vmin=0.0, + comparison_data_clipped, + vmin=vmin, + vmax=vmax, aspect="auto", + cmap=cmap, ) + + # set the colorbar + # cmin = np.nanmin(comparison_data_clipped) + cmin = vmin # always show 0.0 as the start + max_val = np.nanmax(comparison_data_clipped) + # round to the nearest 100 + cmax = round(ceil(max_val), -2) + if cmax < max_val: + cmax += 100 # ensure the colorbar max is above the max value + cnum = round(cmax / 100) + 1 cbar = ax.figure.colorbar(im, ax=ax) + if cmin != vmin or cmax != vmax: + cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits + cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits cbar.ax.set_ylabel("Difference in time to same objective value (lower is better)", rotation=-90, va="bottom") if comparison_unit == "objective": # TODO implement the case for comparison_unit == "objective", check whether it works correctly independent of optimization direction @@ -891,8 +932,7 @@ def norm_color_val(v): if np.isnan(number): continue print(f"{j},{i}: {round(number, 1)}%") - text = ax.text(j, i, f"{round(number, 1)}%", ha="center", va="center", color="black") - print(text) + text = ax.text(j, i, f"{round(number, 1)}%", ha="center", va="center", color="white") # finalize the figure and save or display it fig.tight_layout() From 92e5083d01b9ad6a0db4d63e96751662fd7c7d10 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 12:27:21 +0200 Subject: [PATCH 193/234] Placed head2head labels and ticks on top --- src/autotuning_methodology/visualize_experiments.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index bbbe3c7..faffbc0 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -861,17 +861,19 @@ def norm_color_val(v): # set the x and y labels if comparison_unit == "time": - ax.set_xlabel("how much time do these strategies take...") + ax.set_xlabel("How much time do these strategies take...") elif comparison_unit == "objective": - ax.set_xlabel("how much objective value do these strategies achieve...") + ax.set_xlabel("How much objective value do these strategies achieve...") ax.set_ylabel("...relative to these strategies") + ax.xaxis.set_label_position('top') # set the x and y ticks x_ticks = list(comparison_data_raw.keys()) y_ticks = list(comparison_data_raw.keys()) # Show all ticks and label them with the respective list entries - ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=15, ha="right", rotation_mode="anchor") + ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=-10, ha="right", rotation_mode="anchor") ax.set_yticks(range(len(y_ticks)), labels=y_ticks) + ax.xaxis.tick_top() # set the color map vmin = 0.0 @@ -931,7 +933,6 @@ def norm_color_val(v): number = comparison_data[i, j] if np.isnan(number): continue - print(f"{j},{i}: {round(number, 1)}%") text = ax.text(j, i, f"{round(number, 1)}%", ha="center", va="center", color="white") # finalize the figure and save or display it @@ -942,8 +943,6 @@ def norm_color_val(v): print(f"Figure saved to {filename_path}") else: plt.show() - - raise ValueError(comparison_data) # plot the aggregation if style == "line" and (continue_after_comparison or not (compare_baselines or compare_split_times)): From 3abcfee62d1306b6933c989938e49339ba049472 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 17:31:57 +0200 Subject: [PATCH 194/234] Implemented objective part of head2head aggregate comparison, improved calculation using percentage change --- .../visualize_experiments.py | 103 ++++++++++++------ 1 file changed, 70 insertions(+), 33 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index faffbc0..cb35298 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -845,7 +845,7 @@ def norm_color_val(v): # convert the comparison data dictionary to a 2D numpy array of means comparison_data = np.array( - [[np.mean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] + [[np.nanmean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] for strategy1 in comparison_data_raw.keys()] ).transpose() @@ -859,20 +859,20 @@ def norm_color_val(v): if not save_figs: fig.suptitle(title) - # set the x and y labels - if comparison_unit == "time": - ax.set_xlabel("How much time do these strategies take...") - elif comparison_unit == "objective": - ax.set_xlabel("How much objective value do these strategies achieve...") - ax.set_ylabel("...relative to these strategies") - ax.xaxis.set_label_position('top') + # # set the x and y labels + # if comparison_unit == "time": + # ax.set_xlabel("How much time do these strategies take...") + # elif comparison_unit == "objective": + # ax.set_xlabel("How much objective value do these strategies achieve...") + # ax.set_ylabel("...relative to these strategies?") + # ax.xaxis.set_label_position('top') # set the x and y ticks x_ticks = list(comparison_data_raw.keys()) y_ticks = list(comparison_data_raw.keys()) # Show all ticks and label them with the respective list entries ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=-10, ha="right", rotation_mode="anchor") - ax.set_yticks(range(len(y_ticks)), labels=y_ticks) + ax.set_yticks(range(len(y_ticks)), labels=y_ticks, rotation=-30, ha="right", rotation_mode="anchor") ax.xaxis.tick_top() # set the color map @@ -882,21 +882,33 @@ def norm_color_val(v): """Normalize a color value to fit in the 0-1 range.""" return (v - vmin) / (vmax - vmin) - cmap = LinearSegmentedColormap.from_list( - "head2head_colormap", - [ - (norm_color_val(vmin), "darkgreen"), - (norm_color_val(100.0), "greenyellow"), - (norm_color_val(200.0), "orange"), - (norm_color_val(500.0), "red"), - (norm_color_val(vmax), "darkred"), - # (norm_color_val(vmax), "black"), - ], - ) + if comparison_unit == "time": + cmap = LinearSegmentedColormap.from_list( + "head2head_colormap", + [ + (norm_color_val(vmin), "darkgreen"), + (norm_color_val(100.0), "greenyellow"), + (norm_color_val(200.0), "orange"), + (norm_color_val(500.0), "red"), + (norm_color_val(vmax), "darkred"), + # (norm_color_val(vmax), "black"), + ], + ) + elif comparison_unit == "objective": + cmap = LinearSegmentedColormap.from_list( + "head2head_colormap", + [ + (norm_color_val(vmin), "darkred"), + (norm_color_val(80.0), "yellow"), + (norm_color_val(100.0), "greenyellow"), + (norm_color_val(200.0), "green"), + (norm_color_val(vmax), "darkgreen"), + ], + ) # if there are any values above the vmax, warn if np.any(comparison_data > vmax): - warn(f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped") + warnings.warn(f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped") # clip the comparison data to the vmin-vmax range comparison_data_clipped = np.clip(comparison_data, vmin, vmax) @@ -922,10 +934,12 @@ def norm_color_val(v): if cmin != vmin or cmax != vmax: cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits - cbar.ax.set_ylabel("Difference in time to same objective value (lower is better)", rotation=-90, va="bottom") - if comparison_unit == "objective": - # TODO implement the case for comparison_unit == "objective", check whether it works correctly independent of optimization direction - raise NotImplementedError("Objective value comparison not implemented yet") + if comparison_unit == "time": + cbar.ax.set_ylabel("Percentage difference in time to same objective value (lower is better)", rotation=-90, va="bottom") + elif comparison_unit == "objective": + cbar.ax.set_ylabel("Percentage difference in objective value at same time (higher is better)", rotation=-90, va="bottom") + else: + raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") # loop over data dimensions and create text annotations. for i in range(len(x_ticks)): @@ -938,7 +952,7 @@ def norm_color_val(v): # finalize the figure and save or display it fig.tight_layout() if save_figs: - filename_path = Path(self.plot_filename_prefix) / "head2head_comparison" + filename_path = Path(self.plot_filename_prefix) / f"head2head_comparison_{comparison_unit}" fig.savefig(filename_path, dpi=300, bbox_inches="tight", pad_inches=0.01) print(f"Figure saved to {filename_path}") else: @@ -1318,6 +1332,7 @@ def get_head2head_comparison_data_searchspace( comparison_point = x_axis_range[-1] * compare_at_relative_time comparison_data = dict() confidence_level = 0.95 # irrelevant because the confidence intervals are not used + minimization = searchspace_stats.minimization dist = searchspace_stats.objective_performances_total_sorted for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): inner_comparison_data = dict() @@ -1329,6 +1344,11 @@ def get_head2head_comparison_data_searchspace( time_at_comparison_alpha = time_range_alpha[closest_index_alpha] performance_at_comparison_alpha = curve_alpha[closest_index_alpha] + absolute_optimum = searchspace_stats.total_performance_absolute_optimum() + median = searchspace_stats.total_performance_median() + normalize = lambda val: (val - median) / (absolute_optimum - median) + performance_at_comparison_alpha_norm = normalize(performance_at_comparison_alpha) + # compare against all other strategies for strategy_index_beta, strategy_beta in enumerate(self.strategies): if strategy_index_alpha == strategy_index_beta: @@ -1339,17 +1359,34 @@ def get_head2head_comparison_data_searchspace( # calculate the relative difference between the two strategies at the comparison point if comparison_unit == "time": - # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? - closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) + # given the performance at `compare_at_relative_time`, what is the index of the first time that strategy beta reaches at least the same performance? + index_matching = np.argwhere(curve_beta <= performance_at_comparison_alpha) if minimization else np.argwhere(curve_beta >= performance_at_comparison_alpha) + if index_matching.size == 0: + # if strategy beta never reaches the performance of strategy alpha, we cannot compare + inner_comparison_data[strategy_index_beta] = np.nan + continue + # get the time at which strategy beta reaches the performance of strategy alpha + closest_index_beta = index_matching[0][0] # take the first match time_at_comparison_beta = time_range_beta[closest_index_beta] - # outer takes X% of the time inner takes to reach the same performance - inner_comparison_data[strategy_index_beta] = (time_at_comparison_alpha / time_at_comparison_beta) * 100 + + # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? (lower is better) + # closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) + # time_at_comparison_beta = time_range_beta[closest_index_beta] + # outer takes X% of the time inner takes to reach the same performance (100%+percentage change) + percentage_change = (time_at_comparison_alpha - time_at_comparison_beta) / abs(time_at_comparison_beta) * 100 + inner_comparison_data[strategy_index_beta] = 100 + percentage_change elif comparison_unit == "objective": - # given the time at `compare_at_relative_time`, how much worse is the objective value of strategy beta at that moment compared to strategy alpha? + # given the time at `compare_at_relative_time`, how much worse is the objective value of strategy beta at that moment compared to strategy alpha? (higher is better) closest_index_beta = np.argmin(np.abs(time_range_beta - time_at_comparison_alpha)) performance_at_comparison_beta = curve_beta[closest_index_beta] - # outer performance is X% of inner at the same time - inner_comparison_data[strategy_index_beta] = (performance_at_comparison_alpha / performance_at_comparison_beta) * 100 + performance_at_comparison_beta_norm = normalize(performance_at_comparison_beta) + + # percentage_change = (performance_at_comparison_beta - performance_at_comparison_alpha) / abs(performance_at_comparison_beta) * 100 + # if not minimization: + # percentage_change = -percentage_change + + percentage_change_norm = (performance_at_comparison_beta_norm - performance_at_comparison_alpha_norm) / abs(performance_at_comparison_beta_norm) * 100 + inner_comparison_data[strategy_index_beta] = 100 + percentage_change_norm else: raise ValueError(f"Invalid comparison unit: {comparison_unit}. Expected 'time' or 'objective'.") From fc1b7425eeb9799205a199cb003f6f93e6da7045 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 27 Jun 2025 23:52:28 +0200 Subject: [PATCH 195/234] Implemented adding means under head2head heatmap, additional improvements --- .../visualize_experiments.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index cb35298..c212a10 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -871,7 +871,7 @@ def norm_color_val(v): x_ticks = list(comparison_data_raw.keys()) y_ticks = list(comparison_data_raw.keys()) # Show all ticks and label them with the respective list entries - ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=-10, ha="right", rotation_mode="anchor") + ax.set_xticks(range(len(x_ticks)), labels=x_ticks, rotation=-15, ha="right", rotation_mode="anchor") ax.set_yticks(range(len(y_ticks)), labels=y_ticks, rotation=-30, ha="right", rotation_mode="anchor") ax.xaxis.tick_top() @@ -890,8 +890,8 @@ def norm_color_val(v): (norm_color_val(100.0), "greenyellow"), (norm_color_val(200.0), "orange"), (norm_color_val(500.0), "red"), - (norm_color_val(vmax), "darkred"), - # (norm_color_val(vmax), "black"), + (norm_color_val(800.0), "darkred"), + (norm_color_val(vmax), "black"), ], ) elif comparison_unit == "objective": @@ -935,19 +935,29 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits if comparison_unit == "time": - cbar.ax.set_ylabel("Percentage difference in time to same objective value (lower is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Percentage difference in time to same objective value{chr(10) if len(y_ticks) < 10 else ' '}(lower is better)", rotation=-90, va="bottom") elif comparison_unit == "objective": - cbar.ax.set_ylabel("Percentage difference in objective value at same time (higher is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Percentage difference in objective value at same time{chr(10) if len(y_ticks) < 10 else ' '}(higher is better)", rotation=-90, va="bottom") else: raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") - # loop over data dimensions and create text annotations. + # loop over data dimensions and create text annotations for i in range(len(x_ticks)): for j in range(len(y_ticks)): number = comparison_data[i, j] if np.isnan(number): continue - text = ax.text(j, i, f"{round(number, 1)}%", ha="center", va="center", color="white") + text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white") + + # plot the averages per strategy as labels under the heatmap + averages = np.nanmean(comparison_data, axis=0) + # add "mean" before the averages + ax.text(-0.5, len(y_ticks), "Mean:", ha="right", va="center", color="black", fontsize=10) + for i, avg in enumerate(averages): + ax.text( + i, len(y_ticks), f"{round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black" + ) + print(f"Averages per strategy at {compare_at_relative_time} relative time: {[(s, a) for s, a in zip(x_ticks, averages)]}") # finalize the figure and save or display it fig.tight_layout() From 8e3ef1b4d72cb3ecd29d87c87baa0d35acbd4c83 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 28 Jun 2025 21:20:00 +0200 Subject: [PATCH 196/234] Minor improvements to plotting and plot sizes --- src/autotuning_methodology/visualize_experiments.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index c212a10..98abd38 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -364,7 +364,7 @@ def __init__( fig, axs = plt.subplots( nrows=len(plot_y_value_types), ncols=1, - figsize=(8, 3.4 * len(plot_y_value_types)), + figsize=(8, 4.2 * len(plot_y_value_types)), sharex=True, dpi=300, ) @@ -935,9 +935,9 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits if comparison_unit == "time": - cbar.ax.set_ylabel(f"Percentage difference in time to same objective value{chr(10) if len(y_ticks) < 10 else ' '}(lower is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Percentage difference in time to same objective value{chr(10) if len(y_ticks) < 8 else ' '}(lower is better)", rotation=-90, va="bottom") elif comparison_unit == "objective": - cbar.ax.set_ylabel(f"Percentage difference in objective value at same time{chr(10) if len(y_ticks) < 10 else ' '}(higher is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Percentage difference in objective value at same time{chr(10) if len(y_ticks) < 8 else ' '}(higher is better)", rotation=-90, va="bottom") else: raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") @@ -947,15 +947,15 @@ def norm_color_val(v): number = comparison_data[i, j] if np.isnan(number): continue - text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white") + text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white" if number > 200 else "black", fontsize="small") # plot the averages per strategy as labels under the heatmap averages = np.nanmean(comparison_data, axis=0) # add "mean" before the averages - ax.text(-0.5, len(y_ticks), "Mean:", ha="right", va="center", color="black", fontsize=10) + ax.text(-0.5, len(y_ticks)-0.2, "Mean:", ha="right", va="center", color="black", fontsize=10) for i, avg in enumerate(averages): ax.text( - i, len(y_ticks), f"{round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black" + i, len(y_ticks)-0.2, f"{round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black", fontsize="small" ) print(f"Averages per strategy at {compare_at_relative_time} relative time: {[(s, a) for s, a in zip(x_ticks, averages)]}") From 7b6e6117cbeb3847d9024f2134f524af4b39d9b6 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 28 Jun 2025 21:33:29 +0200 Subject: [PATCH 197/234] Improvements to the experiment files --- .../compare_constrained_strategies_kt.json | 23 ++- ...mpare_constrained_strategies_pyatf_kt.json | 31 ++++- experiment_files/compare_simple.json | 131 ++++++++++++++++++ 3 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 experiment_files/compare_simple.json diff --git a/experiment_files/compare_constrained_strategies_kt.json b/experiment_files/compare_constrained_strategies_kt.json index 12fe630..b8bf320 100644 --- a/experiment_files/compare_constrained_strategies_kt.json +++ b/experiment_files/compare_constrained_strategies_kt.json @@ -77,8 +77,9 @@ "value": false } ], - "display_name": "Firefly constrained (non-constrained)", - "autotuner": "KernelTuner" + "display_name": "Firefly non-constrained", + "autotuner": "KernelTuner", + "color_parent": "firefly_constrained" }, { "name": "genetic_algorithm_constrained", @@ -89,7 +90,7 @@ "value": true } ], - "display_name": "Genetic Algorithm constrained", + "display_name": "GA constrained", "autotuner": "KernelTuner" }, { @@ -101,7 +102,7 @@ "value": false } ], - "display_name": "Genetic Algorithm non-constrained", + "display_name": "GA non-constrained", "autotuner": "KernelTuner", "color_parent": "genetic_algorithm_constrained" }, @@ -139,11 +140,11 @@ "value": true } ], - "display_name": "Simulated Annealing constrained", + "display_name": "SA constrained", "autotuner": "KernelTuner" }, { - "name": "simulated_annealing_non_constrained", + "name": "simulated_annealing_non_constrained_2", "search_method": "simulated_annealing", "search_method_hyperparameters": [ { @@ -151,7 +152,7 @@ "value": false } ], - "display_name": "Simulated Annealing non-constrained", + "display_name": "SA non-constrained", "autotuner": "KernelTuner", "color_parent": "simulated_annealing_constrained" } @@ -215,6 +216,14 @@ "include_y_labels": false, "include_colorbar": true }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + } + }, { "scope": "aggregate", "style": "line", diff --git a/experiment_files/compare_constrained_strategies_pyatf_kt.json b/experiment_files/compare_constrained_strategies_pyatf_kt.json index 9cfd49e..04cb1bd 100644 --- a/experiment_files/compare_constrained_strategies_pyatf_kt.json +++ b/experiment_files/compare_constrained_strategies_pyatf_kt.json @@ -56,6 +56,18 @@ "ignore_cache": false }, "search_strategies": [ + { + "name": "firefly_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT Firefly", + "autotuner": "KernelTuner" + }, { "name": "genetic_algorithm_constrained", "search_method": "genetic_algorithm", @@ -65,7 +77,7 @@ "value": true } ], - "display_name": "KT Genetic Algorithm", + "display_name": "KT GA", "autotuner": "KernelTuner" }, { @@ -89,7 +101,7 @@ "value": true } ], - "display_name": "KT Simulated Annealing", + "display_name": "KT SA", "autotuner": "KernelTuner" }, { @@ -105,7 +117,7 @@ "value": true } ], - "display_name": "pyATF Simulated Annealing", + "display_name": "pyATF SA", "autotuner": "KernelTuner" }, { @@ -137,7 +149,7 @@ "value": true } ], - "display_name": "pyATF Differential Evolution", + "display_name": "pyATF DE", "autotuner": "KernelTuner" }, { @@ -226,10 +238,19 @@ "include_y_labels": false, "include_colorbar": true }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + } + }, { "scope": "aggregate", "style": "line", - "ylabel": "Aggregate performance relative to baseline" + "ylabel": "Aggregate performance relative to baseline", + "vmin": -4.0 } ], "resolution": 1000.0, diff --git a/experiment_files/compare_simple.json b/experiment_files/compare_simple.json new file mode 100644 index 0000000..f46c265 --- /dev/null +++ b/experiment_files/compare_simple.json @@ -0,0 +1,131 @@ +{ + "version": "1.2.0", + "name": "Compare constrained strategies pyATF vs KT", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT GA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_non_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT GA non-constrained", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_constrained" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT PSO constrained", + "autotuner": "KernelTuner" + }, + { + "name": "pso_non_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT PSO non-constrained", + "autotuner": "KernelTuner", + "color_parent": "pso_constrained" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "KT SA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_non_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": false + } + ], + "display_name": "KT SA non-constrained", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_constrained" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + } + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 47e4dcbc3c0de86356ba53b5ebb3e1679124fe32 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 28 Jun 2025 23:33:59 +0200 Subject: [PATCH 198/234] Improved handling of NaNs in head2head comparison --- src/autotuning_methodology/visualize_experiments.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 98abd38..b6affc0 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -843,6 +843,13 @@ def norm_color_val(v): # the comparison data will be a double nested dictionary of the strategy indices comparison_data_raw = self.get_head2head_comparison_data(aggregation_data, compare_at_relative_time, comparison_unit) + # if more than half of the comparisons between two strategies are NaN, set all to NaN + for strategy1 in comparison_data_raw.keys(): + for strategy2 in comparison_data_raw[strategy1].keys(): + comparison = comparison_data_raw[strategy1][strategy2] + if len([v for v in comparison if np.isnan(v)]) > ceil(0.5 * len(comparison)): + comparison_data_raw[strategy1][strategy2] = [np.nan] * len(comparison) + # convert the comparison data dictionary to a 2D numpy array of means comparison_data = np.array( [[np.nanmean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] @@ -971,7 +978,7 @@ def norm_color_val(v): # plot the aggregation if style == "line" and (continue_after_comparison or not (compare_baselines or compare_split_times)): fig, axs = plt.subplots( - ncols=1, figsize=(7.5, 4.4), dpi=300 + ncols=1, figsize=(6.8, 4.0), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] From 5cdf40c7954e5ad43cbcd67d950ef50c4766cef2 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 29 Jun 2025 13:50:06 +0200 Subject: [PATCH 199/234] In head2head comparison, strategies that never reach the target receive a penalty instead of being set to NaN --- .../visualize_experiments.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index b6affc0..158dfbe 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1379,16 +1379,18 @@ def get_head2head_comparison_data_searchspace( # given the performance at `compare_at_relative_time`, what is the index of the first time that strategy beta reaches at least the same performance? index_matching = np.argwhere(curve_beta <= performance_at_comparison_alpha) if minimization else np.argwhere(curve_beta >= performance_at_comparison_alpha) if index_matching.size == 0: - # if strategy beta never reaches the performance of strategy alpha, we cannot compare - inner_comparison_data[strategy_index_beta] = np.nan - continue - # get the time at which strategy beta reaches the performance of strategy alpha - closest_index_beta = index_matching[0][0] # take the first match - time_at_comparison_beta = time_range_beta[closest_index_beta] - - # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? (lower is better) - # closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) - # time_at_comparison_beta = time_range_beta[closest_index_beta] + # if strategy beta never reaches the performance of strategy alpha, we cannot compare, instead we penalize it by taking 10x the last time value + time_at_comparison_beta = time_range_beta[-1] * 10 + # inner_comparison_data[strategy_index_beta] = np.nan + # continue + else: + # get the time at which strategy beta reaches the performance of strategy alpha + closest_index_beta = index_matching[0][0] # take the first match + time_at_comparison_beta = time_range_beta[closest_index_beta] + + # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? (lower is better) + # closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) + # time_at_comparison_beta = time_range_beta[closest_index_beta] # outer takes X% of the time inner takes to reach the same performance (100%+percentage change) percentage_change = (time_at_comparison_alpha - time_at_comparison_beta) / abs(time_at_comparison_beta) * 100 inner_comparison_data[strategy_index_beta] = 100 + percentage_change From 9cc1ecd350ad2645aafee4889645b7e1851a0ec9 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 29 Jun 2025 14:24:04 +0200 Subject: [PATCH 200/234] Implemented text annotation options for heatmaps --- .../compare_constrained_strategies_kt.json | 9 ++++--- ...mpare_constrained_strategies_pyatf_kt.json | 9 ++++--- experiment_files/compare_simple.json | 3 ++- .../schemas/experiments.json | 5 ++++ .../visualize_experiments.py | 24 ++++++++++++++----- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/experiment_files/compare_constrained_strategies_kt.json b/experiment_files/compare_constrained_strategies_kt.json index b8bf320..c2658f5 100644 --- a/experiment_files/compare_constrained_strategies_kt.json +++ b/experiment_files/compare_constrained_strategies_kt.json @@ -200,7 +200,8 @@ ], "cmin": -8.0, "include_y_labels": true, - "include_colorbar": false + "include_colorbar": false, + "annotate": true }, { "scope": "search_strategy", @@ -214,7 +215,8 @@ "cmin": -8.0, "cnum": 10, "include_y_labels": false, - "include_colorbar": true + "include_colorbar": true, + "annotate": true }, { "scope": "aggregate", @@ -222,7 +224,8 @@ "comparison": { "unit": "time", "relative_time": 0.5 - } + }, + "annotate": true }, { "scope": "aggregate", diff --git a/experiment_files/compare_constrained_strategies_pyatf_kt.json b/experiment_files/compare_constrained_strategies_pyatf_kt.json index 04cb1bd..b032289 100644 --- a/experiment_files/compare_constrained_strategies_pyatf_kt.json +++ b/experiment_files/compare_constrained_strategies_pyatf_kt.json @@ -222,7 +222,8 @@ ], "cmin": -8.0, "include_y_labels": true, - "include_colorbar": false + "include_colorbar": false, + "annotate": true }, { "scope": "search_strategy", @@ -236,7 +237,8 @@ "cmin": -8.0, "cnum": 10, "include_y_labels": false, - "include_colorbar": true + "include_colorbar": true, + "annotate": true }, { "scope": "aggregate", @@ -244,7 +246,8 @@ "comparison": { "unit": "time", "relative_time": 0.5 - } + }, + "annotate": true }, { "scope": "aggregate", diff --git a/experiment_files/compare_simple.json b/experiment_files/compare_simple.json index f46c265..372e529 100644 --- a/experiment_files/compare_simple.json +++ b/experiment_files/compare_simple.json @@ -120,7 +120,8 @@ "comparison": { "unit": "time", "relative_time": 0.5 - } + }, + "annotate": true } ], "resolution": 1000.0, diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 65b48be..c934f98 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -393,6 +393,11 @@ } } }, + "annotate": { + "description": "Whether to annotate the heatmaps with the values in their cells.", + "type": "boolean", + "default": true + }, "include_y_labels": { "description": "Whether to show the y-axis labels on the heatmap. Displayed if not set.", "type": "boolean" diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 158dfbe..706438f 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -417,6 +417,7 @@ def __init__( ) plot_x_value_types: list[str] = plot["x_axis_value_types"] plot_y_value_types: list[str] = plot["y_axis_value_types"] + annotate: bool = plot.get("annotate", True) assert len(plot_x_value_types) == 1 assert len(plot_y_value_types) == 1 x_type = plot_x_value_types[0] @@ -725,6 +726,15 @@ def norm_color_val(v): if hide_tick[i]: t.set_visible(False) + # loop over data dimensions and create text annotations + if annotate: + for i in range(len(x_ticks)): + for j in range(len(y_ticks)): + number = plot_data[i, j] + if np.isnan(number): + continue + text = axs[0].text(j, i, f"{round(number, 1) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if number > -2 else "black", fontsize="x-small") + # finalize the figure and save or display it fig.tight_layout() if save_figs: @@ -839,6 +849,7 @@ def norm_color_val(v): if style == "head2head": compare_at_relative_time = plot["comparison"]["relative_time"] comparison_unit = plot["comparison"]["unit"] + annotate = plot.get("annotate", True) # the comparison data will be a double nested dictionary of the strategy indices comparison_data_raw = self.get_head2head_comparison_data(aggregation_data, compare_at_relative_time, comparison_unit) @@ -949,12 +960,13 @@ def norm_color_val(v): raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") # loop over data dimensions and create text annotations - for i in range(len(x_ticks)): - for j in range(len(y_ticks)): - number = comparison_data[i, j] - if np.isnan(number): - continue - text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white" if number > 200 else "black", fontsize="small") + if annotate: + for i in range(len(x_ticks)): + for j in range(len(y_ticks)): + number = comparison_data[i, j] + if np.isnan(number): + continue + text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white" if (number > 200 or number < 50) else "black", fontsize="small") # plot the averages per strategy as labels under the heatmap averages = np.nanmean(comparison_data, axis=0) From fa9a556cd0ca3ac1ae61e9d6cbb3400b00a8cff4 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 29 Jun 2025 14:43:40 +0200 Subject: [PATCH 201/234] Various minor improvements to plotting --- .../visualize_experiments.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 706438f..ebcd10e 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -709,7 +709,7 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits # cbar.set_label("Performance relative to baseline (0.0) and optimum (1.0)") - cbar.set_label("Performance score") + cbar.ax.set_ylabel("Performance score", rotation=-90, va="bottom") # keep only non-overlapping ticks max_ticks = 15 @@ -726,14 +726,16 @@ def norm_color_val(v): if hide_tick[i]: t.set_visible(False) - # loop over data dimensions and create text annotations + # loop over data dimensions and create text annotations if annotate: - for i in range(len(x_ticks)): - for j in range(len(y_ticks)): - number = plot_data[i, j] - if np.isnan(number): - continue - text = axs[0].text(j, i, f"{round(number, 1) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if number > -2 else "black", fontsize="x-small") + # replace with looping over plot_data instead + for i, j in np.ndindex(plot_data.shape): + # for i in range(len(x_ticks)): + # for j in range(len(y_ticks)): + number = plot_data[i, j] + if np.isnan(number): + continue + text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="x-small") # finalize the figure and save or display it fig.tight_layout() @@ -953,9 +955,9 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits if comparison_unit == "time": - cbar.ax.set_ylabel(f"Percentage difference in time to same objective value{chr(10) if len(y_ticks) < 8 else ' '}(lower is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Time difference to same objective value (lower is better)", rotation=-90, va="bottom") elif comparison_unit == "objective": - cbar.ax.set_ylabel(f"Percentage difference in objective value at same time{chr(10) if len(y_ticks) < 8 else ' '}(higher is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel(f"Objective value difference at same time (higher is better)", rotation=-90, va="bottom") else: raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") From 41fd57e28611fb2ff2aa3a7e1ebaa34382d23dd9 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sun, 29 Jun 2025 15:40:57 +0200 Subject: [PATCH 202/234] Improvement to annotation plotting in heatmaps --- .../visualize_experiments.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index ebcd10e..12f189c 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -726,16 +726,14 @@ def norm_color_val(v): if hide_tick[i]: t.set_visible(False) - # loop over data dimensions and create text annotations - if annotate: - # replace with looping over plot_data instead - for i, j in np.ndindex(plot_data.shape): - # for i in range(len(x_ticks)): - # for j in range(len(y_ticks)): - number = plot_data[i, j] - if np.isnan(number): - continue - text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="x-small") + # loop over data dimensions and create text annotations + if annotate: + # replace with looping over plot_data instead + for i, j in np.ndindex(plot_data.shape): + number = plot_data[i, j] + if np.isnan(number): + continue + text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="x-small") # finalize the figure and save or display it fig.tight_layout() From 0c5b8a4c859a1cb7d0f7bb11c6428d6bdf0036a4 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 30 Jun 2025 14:57:52 +0200 Subject: [PATCH 203/234] Improvements to font size --- src/autotuning_methodology/visualize_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 12f189c..5d73c93 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -733,7 +733,7 @@ def norm_color_val(v): number = plot_data[i, j] if np.isnan(number): continue - text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="x-small") + text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="small") # finalize the figure and save or display it fig.tight_layout() From 6a9a50a5a49bc104469b3b753fd43a5324241702 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 30 Jun 2025 20:26:24 +0200 Subject: [PATCH 204/234] Use end time for head2head if not found --- src/autotuning_methodology/visualize_experiments.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 5d73c93..72de55c 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1391,8 +1391,9 @@ def get_head2head_comparison_data_searchspace( # given the performance at `compare_at_relative_time`, what is the index of the first time that strategy beta reaches at least the same performance? index_matching = np.argwhere(curve_beta <= performance_at_comparison_alpha) if minimization else np.argwhere(curve_beta >= performance_at_comparison_alpha) if index_matching.size == 0: - # if strategy beta never reaches the performance of strategy alpha, we cannot compare, instead we penalize it by taking 10x the last time value - time_at_comparison_beta = time_range_beta[-1] * 10 + # if strategy beta never reaches the performance of strategy alpha, we cannot compare, instead we take the time at the end so we know what the minimal performance gain is + time_at_comparison_beta = time_range_beta[-1] + # another alternative: take the last time * fraction of inverse (e.g. if GA-nc doesn’t find the objective of GA, take end-of-time * 1/([GA-to-GAnc]/100)) # inner_comparison_data[strategy_index_beta] = np.nan # continue else: From 8c2a4ba6d4e6314adfd919c80cdba2b178971cbd Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:20:08 +0200 Subject: [PATCH 205/234] Switched to Numpy 2.0, code changes for compatibility --- pyproject.toml | 3 ++- src/autotuning_methodology/baseline.py | 2 +- src/autotuning_methodology/caching.py | 2 +- src/autotuning_methodology/curves.py | 22 +++++++++---------- .../searchspace_statistics.py | 3 +-- .../visualize_experiments.py | 8 +++---- .../unit/test_curves.py | 14 ++++++------ 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6e85b98..5af7e80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: 3.13" ] # https://pypi.org/classifiers/ dependencies = [ - "numpy (>=1.22.4,<2.0.0)", + "numpy (>=2.0.0)", "scipy >= 1.10.1", "scikit-learn >= 1.0.2", "matplotlib >= 3.7.1", @@ -81,6 +81,7 @@ select = [ "E", # pycodestyle "F", # pyflakes, "D", # pydocstyle, + "NPY201", # Numpy 2.0 compatibility ] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index 5c7b33f..64494f3 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -219,7 +219,7 @@ def get_split_times_at_feval( # noqa: D102 # for each key, obtain the time at a feval objective_time_keys = searchspace_stats.objective_time_keys - split_time_per_feval = np.full((len(objective_time_keys), index_at_feval.shape[0]), np.NaN) + split_time_per_feval = np.full((len(objective_time_keys), index_at_feval.shape[0]), np.nan) for key_index, key in enumerate(objective_time_keys): split_time_per_feval[key_index] = searchspace_stats.objective_times_array[key_index, index_at_feval] diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index 75dcb18..493235f 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -144,7 +144,7 @@ def __get_cache_full_filepath(self) -> Path: def __check_for_file(self) -> bool: """Check whether the file exists.""" full_filepath = self.__get_cache_full_filepath() - self.__stored = full_filepath.exists() and np.DataSource().exists(full_filepath) + self.__stored = full_filepath.exists() and np.lib.npyio.DataSource().exists(full_filepath) return self.__stored def __write_to_file(self, arrays: dict): diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index c3be141..71ded58 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -45,11 +45,11 @@ def get_indices_in_distribution( # check whether each value of draws (excluding NaN) is in dist if not skip_draws_check: assert np.all( - np.in1d(draws[~np.isnan(draws)], dist) + np.isin(draws[~np.isnan(draws)], dist) ), f""" Each value in draws should be in dist, - but {np.size(draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)])} values - of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.in1d(draws[~np.isnan(draws)], dist)]}""" + but {np.size(draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)])} values + of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)]}""" # check the sorter if sorter is not None: @@ -60,8 +60,8 @@ def get_indices_in_distribution( assert indices_found.shape == draws.shape, "The shape of the indices must match the shape of the draws" # if indices found are outside the array, make them NaN - indices_found[indices_found < 0] = np.NaN - indices_found[indices_found >= len(dist)] = np.NaN + indices_found[indices_found < 0] = np.nan + indices_found[indices_found >= len(dist)] = np.nan return indices_found @@ -87,7 +87,7 @@ def get_indices_in_array(values: np.ndarray, array: np.ndarray) -> np.ndarray: # replace the indices found with the original, unsorted indices of array nan_mask = ~np.isnan(indices_found) - indices_found_unsorted = np.full_like(indices_found, fill_value=np.NaN) + indices_found_unsorted = np.full_like(indices_found, fill_value=np.nan) indices_found_unsorted[nan_mask] = array_sorter[indices_found[nan_mask].astype(int)] return indices_found_unsorted @@ -882,7 +882,7 @@ def get_split_times_at_feval( # noqa: D102 objective_time_keys = searchspace_stats.objective_time_keys num_keys = len(objective_time_keys) num_repeats = matching_indices_mask.shape[1] - masked_time_per_key = np.full((num_keys, matching_indices_mask.shape[0], num_repeats), np.NaN) + masked_time_per_key = np.full((num_keys, matching_indices_mask.shape[0], num_repeats), np.nan) # for each key, apply the boolean mask for key_index in range(num_keys): @@ -891,13 +891,13 @@ def get_split_times_at_feval( # noqa: D102 ] # remove where every repeat has NaN - time_in_range_per_key = np.full((num_keys, fevals_range.shape[0], num_repeats), np.NaN) + time_in_range_per_key = np.full((num_keys, fevals_range.shape[0], num_repeats), np.nan) for key_index in range(num_keys): all_nan_mask = ~np.all(np.isnan(masked_time_per_key[key_index]), axis=1) time_in_range_per_key[key_index] = masked_time_per_key[key_index][all_nan_mask] # get the median time per key at each repeat - split_time_per_feval = np.full((num_keys, fevals_range.shape[0]), np.NaN) + split_time_per_feval = np.full((num_keys, fevals_range.shape[0]), np.nan) for key_index in range(num_keys): split_time_per_feval[key_index] = np.mean(time_in_range_per_key[key_index], axis=1) assert split_time_per_feval.shape == ( @@ -916,7 +916,7 @@ def get_split_times_at_time( # noqa: D102 # for each key, interpolate the split times to the time range num_keys = len(searchspace_stats.objective_time_keys) - split_time_per_timestamp = np.full((num_keys, time_range.shape[0]), np.NaN) + split_time_per_timestamp = np.full((num_keys, time_range.shape[0]), np.nan) for key_index in range(num_keys): # remove NaN times_split_key = times_split[key_index] @@ -983,7 +983,7 @@ def _get_prediction_interval_separated( num_repeats = values.shape[1] # predict an isotonic curve for the time range for each run - predictions = np.full((num_repeats, time_range.shape[0]), fill_value=np.NaN) + predictions = np.full((num_repeats, time_range.shape[0]), fill_value=np.nan) for run in range(num_repeats): # get the data of this run _x = times[:, run] diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index fa014df..4633219 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -2,7 +2,6 @@ from __future__ import annotations # for correct nested type hints e.g. list[str], tuple[dict, str] -import json from math import ceil, floor from pathlib import Path from warnings import warn @@ -11,7 +10,7 @@ import numpy as np from autotuning_methodology.formats_interface import load_T4_format -from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time, validate_T4 +from autotuning_methodology.validators import is_invalid_objective_performance, is_invalid_objective_time def nansumwrapper(array: np.ndarray, **kwargs) -> np.ndarray: diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 72de55c..81b2d80 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -953,9 +953,9 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits if comparison_unit == "time": - cbar.ax.set_ylabel(f"Time difference to same objective value (lower is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel("Time difference to same objective value (lower is better)", rotation=-90, va="bottom") elif comparison_unit == "objective": - cbar.ax.set_ylabel(f"Objective value difference at same time (higher is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel("Objective value difference at same time (higher is better)", rotation=-90, va="bottom") else: raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") @@ -1232,7 +1232,7 @@ def plot_split_times_bar_comparison( for _ in range((len(strategies_curves) - len(self.plot_skip_strategies)) + 1) ) for objective_time_key in objective_time_keys: - data_dict[objective_time_key] = np.full((len(strategies_curves)), np.NaN) + data_dict[objective_time_key] = np.full((len(strategies_curves)), np.nan) for strategy_index, strategy_curve in enumerate(strategies_curves): if strategy_curve.name in self.plot_skip_strategies: continue @@ -1700,7 +1700,7 @@ def plot_strategies_aggregated( raise ValueError(f"Invalid {tmin=}, must be between 0.0 and 1.0 or 'real'") # adjust the xlabel if necessary - if tmin == "real" and not "xlabel" in plot_settings: + if tmin == "real" and "xlabel" not in plot_settings: xlabel = "Relative time until the last strategy stopped" # plot each strategy diff --git a/tests/autotuning_methodology/unit/test_curves.py b/tests/autotuning_methodology/unit/test_curves.py index bc0b4fe..0fcd7cb 100644 --- a/tests/autotuning_methodology/unit/test_curves.py +++ b/tests/autotuning_methodology/unit/test_curves.py @@ -8,9 +8,9 @@ def test_get_indices_in_distribution(): """Each draw should have the same value as the associated value in the distribution.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) dist = np.array([1, 2, 4, 4, 4.5, 5]) - expected_indices = np.array([[2, np.NaN, 5], [0, 1, 4]]) + expected_indices = np.array([[2, np.nan, 5], [0, 1, 4]]) indices_found = get_indices_in_distribution(draws=draws, dist=dist) @@ -31,16 +31,16 @@ def test_get_indices_in_distribution(): def test_get_indices_in_distribution_check_dist(): """Dist order should be checked by default and dist should not contain NaN.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) with pytest.raises(AssertionError, match="2 violations in 5 values"): - get_indices_in_distribution(draws=draws, dist=np.array([1, 2, np.NaN, 4, 4.5])) + get_indices_in_distribution(draws=draws, dist=np.array([1, 2, np.nan, 4, 4.5])) with pytest.raises(AssertionError, match="1 violations in 4 values"): get_indices_in_distribution(draws=draws, dist=np.array([5, 4, 6, 7])) def test_get_indices_in_distribution_check_draws(): """Values in draw (with the exception of NaN) that are not in dist should throw an exception.""" - draws = np.array([[4, np.NaN, 3], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 3], [1, 2, 4.5]]) dist = np.array([1, 2, 4, 4, 4.5, 5]) with pytest.raises(AssertionError, match="Each value in draws should be in dist"): get_indices_in_distribution(draws=draws, dist=dist) @@ -48,9 +48,9 @@ def test_get_indices_in_distribution_check_draws(): def test_get_indices_in_array(): """Each value should have the same value as the associated value in the unsorted array.""" - draws = np.array([[4, np.NaN, 5], [1, 2, 4.5]]) + draws = np.array([[4, np.nan, 5], [1, 2, 4.5]]) dist = np.array([4, 2, 1, 4, 5, 4.5]) - expected_indices = np.array([[0, np.NaN, 4], [2, 1, 5]]) + expected_indices = np.array([[0, np.nan, 4], [2, 1, 5]]) indices_found = get_indices_in_array(values=draws, array=dist) From 101d283188be7e6a3dc22542ace7b46e8b5d4837 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:26:12 +0200 Subject: [PATCH 206/234] Applied Ruff formatting --- src/autotuning_methodology/baseline.py | 12 +- src/autotuning_methodology/caching.py | 12 +- src/autotuning_methodology/curves.py | 97 ++++++---- src/autotuning_methodology/experiments.py | 10 +- .../formats_interface.py | 3 +- .../report_experiments.py | 8 +- src/autotuning_methodology/runner.py | 24 ++- .../searchspace_statistics.py | 40 ++-- .../visualize_experiments.py | 182 ++++++++++++------ 9 files changed, 254 insertions(+), 134 deletions(-) diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index 64494f3..52a0f37 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -184,7 +184,9 @@ def get_curve_over_fevals( # noqa: D102 return self._get_random_curve_means(fevals_range) return self._get_random_curve(fevals_range) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 + def get_curve_over_time( + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: # noqa: D102 fevals_range = self.time_to_fevals(time_range) curve_over_time = self.get_curve_over_fevals(fevals_range, dist, confidence_level) smoothing_factor = 0.0 @@ -337,7 +339,9 @@ def get_curve_over_fevals( # noqa: D102 assert self.y_array.ndim == 1 return self.y_array[fevals_range] - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 + def get_curve_over_time( + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: # noqa: D102 predicted_y_values = self._ir.predict(time_range) if not self.use_index: return predicted_y_values @@ -427,7 +431,9 @@ def get_curve_over_fevals( # noqa: D102 ) return self.stochastic_curve_to_deterministic(range=fevals_range, curve=stochastic_curve) - def get_curve_over_time(self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False) -> np.ndarray: # noqa: D102 + def get_curve_over_time( + self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False + ) -> np.ndarray: # noqa: D102 if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted if confidence_level is None: diff --git a/src/autotuning_methodology/caching.py b/src/autotuning_methodology/caching.py index 493235f..43556ec 100755 --- a/src/autotuning_methodology/caching.py +++ b/src/autotuning_methodology/caching.py @@ -121,12 +121,16 @@ def is_same_as(self, other: ResultsDescription) -> bool: # check if same value for each key for attribute_key, attribute_value in self.__get_as_dict().items(): - if attribute_key == "group_display_name" or attribute_key == "visualization_caches_path" or attribute_key == "run_folder": + if ( + attribute_key == "group_display_name" + or attribute_key == "visualization_caches_path" + or attribute_key == "run_folder" + ): continue else: - assert ( - attribute_value == other.__get_as_dict()[attribute_key] - ), f"{attribute_key} has different values: {attribute_value} != {other.__get_as_dict()[attribute_key]}" + assert attribute_value == other.__get_as_dict()[attribute_key], ( + f"{attribute_key} has different values: {attribute_value} != {other.__get_as_dict()[attribute_key]}" + ) return True diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index 71ded58..6094d3e 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -37,16 +37,12 @@ def get_indices_in_distribution( # check whether the distribution is correctly ordered if not skip_dist_check: strictly_ascending_sort = dist[:-1] <= dist[1:] - assert np.all( - strictly_ascending_sort - ), f"""Distribution is not sorted ascendingly, + assert np.all(strictly_ascending_sort), f"""Distribution is not sorted ascendingly, {np.count_nonzero(~strictly_ascending_sort)} violations in {len(dist)} values: {dist}""" # check whether each value of draws (excluding NaN) is in dist if not skip_draws_check: - assert np.all( - np.isin(draws[~np.isnan(draws)], dist) - ), f""" + assert np.all(np.isin(draws[~np.isnan(draws)], dist)), f""" Each value in draws should be in dist, but {np.size(draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)])} values of the {np.size(draws)} are missing: {draws[~np.isnan(draws)][~np.isin(draws[~np.isnan(draws)], dist)]}""" @@ -116,7 +112,14 @@ class CurveBasis(ABC): """Abstract object providing minimals for visualization and analysis. Implemented by ``Curve`` and ``Baseline``.""" @abstractmethod - def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): + def get_curve( + self, + range: np.ndarray, + x_type: str, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + ): """Get the curve over the specified range of time or function evaluations. Args: @@ -140,7 +143,13 @@ def get_curve(self, range: np.ndarray, x_type: str, dist: np.ndarray = None, con raise ValueError(f"x_type must be 'fevals' or 'time', is {x_type}") @abstractmethod - def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): + def get_curve_over_fevals( + self, + fevals_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + ): """Get the curve over function evaluations. Args: @@ -157,7 +166,9 @@ def get_curve_over_fevals(self, fevals_range: np.ndarray, dist: np.ndarray = Non raise NotImplementedError @abstractmethod - def get_curve_over_time(self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True): + def get_curve_over_time( + self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True + ): """Get the curve over time. Args: @@ -510,9 +521,9 @@ def _check_curve_real_fictional_consistency( if x_axis_range_fictional.ndim > 0: # if there's a fictional part, ensure that all the expected data is in the combined real and fictional parts x_axis_range_combined = np.concatenate([x_axis_range_real, x_axis_range_fictional]) - assert ( - x_axis_range.shape == x_axis_range_combined.shape - ), f"The shapes of {x_axis_range.shape=} and {x_axis_range_combined.shape=} do not match" + assert x_axis_range.shape == x_axis_range_combined.shape, ( + f"The shapes of {x_axis_range.shape=} and {x_axis_range_combined.shape=} do not match" + ) assert np.array_equal( x_axis_range, np.concatenate([x_axis_range_real, x_axis_range_fictional]), equal_nan=True ) @@ -525,22 +536,27 @@ def _check_curve_real_fictional_consistency( ) else: # if there is no fictional part, ensure that all the expected data is in the real part - assert ( - x_axis_range.shape == x_axis_range_real.shape - ), f"The shapes of {x_axis_range.shape=} and {x_axis_range_real.shape=} do not match" - assert np.array_equal( - x_axis_range, x_axis_range_real, equal_nan=True - ), f"Unequal arrays: {x_axis_range}, {x_axis_range_real}" + assert x_axis_range.shape == x_axis_range_real.shape, ( + f"The shapes of {x_axis_range.shape=} and {x_axis_range_real.shape=} do not match" + ) + assert np.array_equal(x_axis_range, x_axis_range_real, equal_nan=True), ( + f"Unequal arrays: {x_axis_range}, {x_axis_range_real}" + ) assert np.array_equal(curve, curve_real, equal_nan=True), f"Unequal arrays: {curve}, {curve_real}" - assert np.array_equal( - curve_lower_err, curve_lower_err_real, equal_nan=True - ), f"Unequal arrays: {curve_lower_err}, {curve_lower_err_real}" - assert np.array_equal( - curve_upper_err, curve_upper_err_real, equal_nan=True - ), f"Unequal arrays: {curve_upper_err}, {curve_upper_err_real}" + assert np.array_equal(curve_lower_err, curve_lower_err_real, equal_nan=True), ( + f"Unequal arrays: {curve_lower_err}, {curve_lower_err_real}" + ) + assert np.array_equal(curve_upper_err, curve_upper_err_real, equal_nan=True), ( + f"Unequal arrays: {curve_upper_err}, {curve_upper_err_real}" + ) def get_curve( # noqa: D102 - self, range: np.ndarray, x_type: str, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True + self, + range: np.ndarray, + x_type: str, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, ): return super().get_curve(range, x_type, dist, confidence_level, return_split=return_split) @@ -600,9 +616,9 @@ def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tu masked_fevals[:, greatest_common_non_NaN_index + 1 :] = np.nan # check that the filtered fevals are consistent - assert np.allclose( - masked_fevals, masked_fevals[0], equal_nan=True - ), "Every repeat must have the same array of function evaluations" + assert np.allclose(masked_fevals, masked_fevals[0], equal_nan=True), ( + "Every repeat must have the same array of function evaluations" + ) # as every repeat has the same array of fevals, check whether they match the range fevals = masked_fevals[ @@ -618,7 +634,11 @@ def _get_curve_over_fevals_values_in_range(self, fevals_range: np.ndarray) -> tu return fevals, masked_values def get_curve_over_fevals( # noqa: D102 - self, fevals_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True + self, + fevals_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, ): fevals, masked_values = self._get_curve_over_fevals_values_in_range(fevals_range) @@ -779,7 +799,12 @@ def _get_curve_over_time_values_in_range( return times, values, real_stopping_point_time, num_fevals, num_repeats def get_curve_over_time( # noqa: D102 - self, time_range: np.ndarray, dist: np.ndarray = None, confidence_level: float = None, return_split: bool = True, use_bagging=True + self, + time_range: np.ndarray, + dist: np.ndarray = None, + confidence_level: float = None, + return_split: bool = True, + use_bagging=True, ): # check the distribution if dist is None: @@ -839,9 +864,9 @@ def get_curve_over_time( # noqa: D102 indices_curve = prediction_interval[:, 2] curve = dist[indices_curve] curve_lower_err, curve_upper_err = dist[prediction_interval[:, 0]], dist[prediction_interval[:, 1]] - assert ( - curve_lower_err.shape == curve_upper_err.shape == curve.shape - ), f"{curve_lower_err.shape=} != {curve_upper_err.shape=} != {curve.shape=}" + assert curve_lower_err.shape == curve_upper_err.shape == curve.shape, ( + f"{curve_lower_err.shape=} != {curve_upper_err.shape=} != {curve.shape=}" + ) # print(f"{self.display_name}: {np.median(curve - curve_lower_err)}, {np.median(curve_upper_err - curve)}") # for t, e, i in zip(time_range, curve_lower_err, prediction_interval[:, 0]): @@ -1012,9 +1037,9 @@ def _get_prediction_interval_separated( predictions = predictions.transpose() # set to (time_range, num_repeats) y_lower_err, y_upper_err = self.get_confidence_interval(predictions, confidence_level=confidence_level) mean_prediction = np.median(predictions, axis=1) - assert ( - y_lower_err.shape == y_upper_err.shape == mean_prediction.shape == time_range.shape - ), f"{y_lower_err.shape=} != {y_upper_err.shape=} != {mean_prediction.shape=} != {time_range.shape=}" + assert y_lower_err.shape == y_upper_err.shape == mean_prediction.shape == time_range.shape, ( + f"{y_lower_err.shape=} != {y_upper_err.shape=} != {mean_prediction.shape=} != {time_range.shape=}" + ) # combine the data and return as a prediction interval prediction_interval = np.concatenate([y_lower_err, y_upper_err, mean_prediction]).reshape((3, -1)).transpose() diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index b5c3938..6756a25 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -210,13 +210,15 @@ def generate_all_experimental_groups( gpu, application["name"], ) - + # get the objective performance keys if "objective_performance_keys" in application: group["objective_performance_keys"] = application["objective_performance_keys"] else: # load the full search space file and derive the objective performance keys - print(f"Loading full search space file {group['full_search_space_file']} to infer the objective performance keys. Consider setting them explicititely in the experiments file.") + print( + f"Loading full search space file {group['full_search_space_file']} to infer the objective performance keys. Consider setting them explicititely in the experiments file." + ) data = load_T4_format(group["full_search_space_file"], validate=True) objectives = data["results"][0]["objectives"] assert len(objectives) == 1, "Only one objective is supported for now" @@ -327,7 +329,9 @@ def calculate_budget(group: dict, statistics_settings: dict, searchspace_stats: # register in the group group["budget"] = {} group["cutoff_times"] = { - "cutoff_time_start": max(cutoff_point_start_time, 0.0) if statistics_settings["cutoff_percentile_start"] > 0.0 else 0.0, + "cutoff_time_start": max(cutoff_point_start_time, 0.0) + if statistics_settings["cutoff_percentile_start"] > 0.0 + else 0.0, "cutoff_time": cutoff_point_time * (1 + cutoff_margin), } diff --git a/src/autotuning_methodology/formats_interface.py b/src/autotuning_methodology/formats_interface.py index 571b544..5214529 100644 --- a/src/autotuning_methodology/formats_interface.py +++ b/src/autotuning_methodology/formats_interface.py @@ -4,6 +4,7 @@ from pathlib import Path from autotuning_methodology.validators import validate_T4 + def load_T4_format(filepath: Path, validate: True) -> dict: """Load and optionally validate a T4 format file.""" with open(filepath, "r", encoding="utf-8") as fh: @@ -24,4 +25,4 @@ def load_T4_format(filepath: Path, validate: True) -> dict: validate_T4(data) # return the T4 data - return data \ No newline at end of file + return data diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index b91c64b..1f4514d 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -71,7 +71,7 @@ def get_aggregation_data( results_description = results_descriptions[gpu_name][application_name][strategy["name"]] if results_description is None: raise ValueError( - f"""Strategy {strategy['display_name']} not in results_description, + f"""Strategy {strategy["display_name"]} not in results_description, make sure execute_experiment() has ran first""" ) curve = StochasticOptimizationAlgorithm(results_description) @@ -229,9 +229,9 @@ def get_agg_data(): if "Not enough overlap in time range and time values" in str(e.args[0]): # delete the broken cachefile _, strategy_name, application_name, device_name = e.args - assert results_descriptions[device_name][application_name][ - strategy_name - ].delete(), "Failed to delete cachefile" + assert results_descriptions[device_name][application_name][strategy_name].delete(), ( + "Failed to delete cachefile" + ) # re-execute the experiment and recollect the data to see if the issue is resolved experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 0689370..b560818 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -163,7 +163,9 @@ def tune_with_kerneltuner(): f"Much fewer configurations were returned ({num_results}) than the requested {max_fevals}" ) if num_results < 2 and group["budget"]["max_fevals"] > 2: - raise ValueError(f"Less than two configurations were returned ({len(results['results'])}, budget {group['budget']}) \n") + raise ValueError( + f"Less than two configurations were returned ({len(results['results'])}, budget {group['budget']}) \n" + ) return metadata, results def tune_with_BAT(): @@ -261,13 +263,13 @@ def collect_results( def report_multiple_attempts(rep: int, len_res: int, group_repeats: int, attempt: int): """If multiple attempts are necessary, report the reason.""" if len_res < 1: - print(f"({rep+1}/{group_repeats}) No results found, trying once more...") + print(f"({rep + 1}/{group_repeats}) No results found, trying once more...") elif len_res < min_num_evals: print( f"Too few results found ({len_res} of {min_num_evals} required, attempt {attempt}), trying once more..." ) else: - print(f"({rep+1}/{group_repeats}) Only invalid results found, trying once more...") + print(f"({rep + 1}/{group_repeats}) Only invalid results found, trying once more...") def cumulative_time_taken(results: list) -> list: """Calculates the cumulative time taken for each of the configurations in results.""" @@ -310,7 +312,9 @@ def cumulative_time_taken(results: list) -> list: if attempt > 0: report_multiple_attempts(rep, len_res, group["repeats"], attempt) if attempt >= 20: - raise RuntimeError(f"Could not find enough results for {results_description.application_name} on {results_description.device_name} in {attempt} attempts ({'only invalid, ' if only_invalid else ''}{len_res}/{min_num_evals}), quiting...") + raise RuntimeError( + f"Could not find enough results for {results_description.application_name} on {results_description.device_name} in {attempt} attempts ({'only invalid, ' if only_invalid else ''}{len_res}/{min_num_evals}), quiting..." + ) _, results, total_time_ms = tune( input_file, results_description.application_name, @@ -406,9 +410,9 @@ def get_nan_array() -> np.ndarray: objective_times_list = [] for key_index, key in enumerate(objective_time_keys): evaluation_times = evaluation["times"] - assert ( - key in evaluation_times - ), f"Objective time key {key} not in evaluation['times'] ({evaluation_times})" + assert key in evaluation_times, ( + f"Objective time key {key} not in evaluation['times'] ({evaluation_times})" + ) if isinstance(evaluation_times[key], list): # this happens when runtimes are in objective_time_keys value = sum(evaluation_times[key]) @@ -430,9 +434,9 @@ def get_nan_array() -> np.ndarray: for key_index, key in enumerate(objective_performance_keys): evaluation_measurements = evaluation["measurements"] measurements = list(filter(lambda m: m["name"] == key, evaluation_measurements)) - assert ( - len(measurements) > 0 - ), f"Objective performance key name {key} not in evaluation['measurements'] ({evaluation_measurements})" + assert len(measurements) > 0, ( + f"Objective performance key name {key} not in evaluation['measurements'] ({evaluation_measurements})" + ) assert len(measurements) == 1, f"""Objective performance key name {key} multiply defined in evaluation['measurements'] ({evaluation_measurements})""" value = measurements[0]["value"] diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 4633219..4fa50b6 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -264,7 +264,11 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: Returns: A tuple of the objective value at the cutoff point and the fevals to the cutoff point. """ - inverted_sorted_performance_arr = self.objective_performances_total_sorted[::-1] if self.minimization else self.objective_performances_total_sorted + inverted_sorted_performance_arr = ( + self.objective_performances_total_sorted[::-1] + if self.minimization + else self.objective_performances_total_sorted + ) N = inverted_sorted_performance_arr.shape[0] # get the objective performance at the cutoff point @@ -295,11 +299,15 @@ def cutoff_point(self, cutoff_percentile: float) -> tuple[float, int]: # i = next(x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] > cutoff_percentile * arr[-1]) if self.minimization: i = next( - x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] <= objective_performance_at_cutoff_point + x[0] + for x in enumerate(inverted_sorted_performance_arr) + if x[1] <= objective_performance_at_cutoff_point ) else: i = next( - x[0] for x in enumerate(inverted_sorted_performance_arr) if x[1] >= objective_performance_at_cutoff_point + x[0] + for x in enumerate(inverted_sorted_performance_arr) + if x[1] >= objective_performance_at_cutoff_point ) if cutoff_percentile != 1.0 and inverted_sorted_performance_arr[i] == self.total_performance_absolute_optimum(): if i == 0: @@ -371,7 +379,9 @@ def cutoff_point_fevals_time_start_end( raise ValueError("Cutoff point start and end are the same") # get the times - cutoff_point_time_start = self.cutoff_point_time_from_fevals(cutoff_point_fevals_start if cutoff_percentile_start > 0.0 else 0) + cutoff_point_time_start = self.cutoff_point_time_from_fevals( + cutoff_point_fevals_start if cutoff_percentile_start > 0.0 else 0 + ) cutoff_point_time_end = self.cutoff_point_time_from_fevals(cutoff_point_fevals_end) # return the values @@ -413,12 +423,12 @@ def _load(self) -> bool: self.objective_times = dict() for key in self.objective_time_keys: self.objective_times[key] = to_valid_array(results, key, performance=False, from_time_unit=timeunit) - assert ( - self.objective_times[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_times[key].ndim}" - assert ( - self.objective_times[key].shape[0] == self.size - ), f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" + assert self.objective_times[key].ndim == 1, ( + f"Should have one dimension, has {self.objective_times[key].ndim}" + ) + assert self.objective_times[key].shape[0] == self.size, ( + f"Should have the same size as results ({self.size}), has {self.objective_times[key].shape[0]}" + ) assert not np.all(np.isnan(self.objective_times[key])), f"""All values for {key=} are NaN. Likely the experiment did not collect time values for objective_time_keys '{key}'.""" @@ -431,9 +441,9 @@ def _load(self) -> bool: performance=True, replace_missing_measurement_from_times_key="runtimes" if key == "time" else None, ) - assert ( - self.objective_performances[key].ndim == 1 - ), f"Should have one dimension, has {self.objective_performances[key].ndim}" + assert self.objective_performances[key].ndim == 1, ( + f"Should have one dimension, has {self.objective_performances[key].ndim}" + ) assert ( self.objective_performances[key].shape[0] == self.size ), f"""Should have the same size as results ({self.size}), @@ -473,9 +483,7 @@ def _load(self) -> bool: {np.nansum(self.objective_performances_array[:, 0])} vs. {self.objective_performances_total[0]}""" # sort - self.objective_times_total_sorted = np.sort( - self.objective_times_total[~np.isnan(self.objective_times_total)] - ) + self.objective_times_total_sorted = np.sort(self.objective_times_total[~np.isnan(self.objective_times_total)]) self.objective_times_number_of_nan = ( self.objective_times_total.shape[0] - self.objective_times_total_sorted.shape[0] ) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 81b2d80..df97845 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -44,6 +44,7 @@ def lighten_color(color, amount: float = 0.5): r, g, b = to_rgb(color) return to_hex([(1 - amount) * c + amount for c in (r, g, b)]) + def get_colors(strategies: list[dict]) -> list: """Assign colors using the tab10 colormap, with lighter shades for children.""" tab10 = plt.get_cmap("tab10").colors @@ -70,8 +71,7 @@ def get_colors(strategies: list[dict]) -> list: raise ValueError(f"Color parent '{name}' has more than two children") base_color = tab10[color_index] parent_colors[name] = { - idx: lighten_color(base_color, amount=0.4 + 0.3 * j) - for j, idx in enumerate(children_indices) + idx: lighten_color(base_color, amount=0.4 + 0.3 * j) for j, idx in enumerate(children_indices) } colors[i] = to_hex(base_color) color_index += 1 @@ -475,9 +475,9 @@ def norm_color_val(v): for strategy in self.strategies: strategy_name = strategy["name"] strategy_displayname = strategy["display_name"] - assert ( - sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1 - ), f"Strategy name '{strategy_name}' is not unqiue" + assert sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1, ( + f"Strategy name '{strategy_name}' is not unqiue" + ) # get the data from the collected aggregated data for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: @@ -515,9 +515,9 @@ def norm_color_val(v): if combine else x_axis_range_real ) - assert np.array_equal( - time_range, x_axis_range, equal_nan=True - ), "time_range != x_axis_range" + assert np.array_equal(time_range, x_axis_range, equal_nan=True), ( + "time_range != x_axis_range" + ) curve = np.concatenate([curve_real, curve_fictional]) if combine else curve_real # get the standardised curves and write them to the collector curve: np.ndarray = random_baseline.get_standardised_curves( @@ -569,7 +569,7 @@ def norm_color_val(v): ), "time": ( np.round(np.linspace(0.0, 1.0, bins), 2), - f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", + f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", ), } x_ticks = label_data[x_type][0] @@ -597,13 +597,15 @@ def norm_color_val(v): ) # validate the data is within the vmin-vmax range and visible colorbar range - assert not (plot_data > 1.0).any(), "Plot data contains values greater than 1.0, which should not be possible. Please investigate." + assert not (plot_data > 1.0).any(), ( + "Plot data contains values greater than 1.0, which should not be possible. Please investigate." + ) if cap_to_vmin: plot_data = np.clip(plot_data, vmin, 1.0) outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert ( - len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( + f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + ) outside_visible_range = np.where(np.logical_or(plot_data < cmin, plot_data > cmax)) if not (len(outside_visible_range[0]) == 0 and len(outside_visible_range[1]) == 0): warnings.warn( @@ -733,7 +735,15 @@ def norm_color_val(v): number = plot_data[i, j] if np.isnan(number): continue - text = axs[0].text(j, i, f"{round(number, 2) if number < -10 else round(number, 3)}", ha="center", va="center", color="white" if (number > 0.5 or number < -2) else "black", fontsize="small") + text = axs[0].text( + j, + i, + f"{round(number, 2) if number < -10 else round(number, 3)}", + ha="center", + va="center", + color="white" if (number > 0.5 or number < -2) else "black", + fontsize="small", + ) # finalize the figure and save or display it fig.tight_layout() @@ -804,7 +814,7 @@ def norm_color_val(v): ), "time": ( np.round(np.linspace(0.0, 1.0, bins), 2), - f"Fraction of time between {cutoff_percentile_start*100}% and {cutoff_percentile*100}%", + f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", ), } x_ticks = label_data[x_type][0] @@ -830,9 +840,9 @@ def norm_color_val(v): # validate the data outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert ( - len(outside_range[0]) == 0 and len(outside_range[1]) == 0 - ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( + f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + ) else: raise NotImplementedError(f"Invalid {style=}") @@ -852,7 +862,9 @@ def norm_color_val(v): annotate = plot.get("annotate", True) # the comparison data will be a double nested dictionary of the strategy indices - comparison_data_raw = self.get_head2head_comparison_data(aggregation_data, compare_at_relative_time, comparison_unit) + comparison_data_raw = self.get_head2head_comparison_data( + aggregation_data, compare_at_relative_time, comparison_unit + ) # if more than half of the comparisons between two strategies are NaN, set all to NaN for strategy1 in comparison_data_raw.keys(): @@ -863,8 +875,13 @@ def norm_color_val(v): # convert the comparison data dictionary to a 2D numpy array of means comparison_data = np.array( - [[np.nanmean(comparison_data_raw[strategy1][strategy2]) for strategy2 in comparison_data_raw[strategy1].keys()] - for strategy1 in comparison_data_raw.keys()] + [ + [ + np.nanmean(comparison_data_raw[strategy1][strategy2]) + for strategy2 in comparison_data_raw[strategy1].keys() + ] + for strategy1 in comparison_data_raw.keys() + ] ).transpose() # set up the plot @@ -883,7 +900,7 @@ def norm_color_val(v): # elif comparison_unit == "objective": # ax.set_xlabel("How much objective value do these strategies achieve...") # ax.set_ylabel("...relative to these strategies?") - # ax.xaxis.set_label_position('top') + # ax.xaxis.set_label_position('top') # set the x and y ticks x_ticks = list(comparison_data_raw.keys()) @@ -896,6 +913,7 @@ def norm_color_val(v): # set the color map vmin = 0.0 vmax = 1000.0 + def norm_color_val(v): """Normalize a color value to fit in the 0-1 range.""" return (v - vmin) / (vmax - vmin) @@ -926,7 +944,9 @@ def norm_color_val(v): # if there are any values above the vmax, warn if np.any(comparison_data > vmax): - warnings.warn(f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped") + warnings.warn( + f"There are values above the vmax ({vmax}) in the comparison data: {comparison_data[comparison_data > vmax]}, these are clipped" + ) # clip the comparison data to the vmin-vmax range comparison_data_clipped = np.clip(comparison_data, vmin, vmax) @@ -941,7 +961,7 @@ def norm_color_val(v): # set the colorbar # cmin = np.nanmin(comparison_data_clipped) - cmin = vmin # always show 0.0 as the start + cmin = vmin # always show 0.0 as the start max_val = np.nanmax(comparison_data_clipped) # round to the nearest 100 cmax = round(ceil(max_val), -2) @@ -953,9 +973,13 @@ def norm_color_val(v): cbar.set_ticks(np.linspace(cmin, cmax, num=cnum)) # set colorbar limits cbar.ax.set_ylim(cmin, cmax) # adjust visible colorbar limits if comparison_unit == "time": - cbar.ax.set_ylabel("Time difference to same objective value (lower is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel( + "Time difference to same objective value (lower is better)", rotation=-90, va="bottom" + ) elif comparison_unit == "objective": - cbar.ax.set_ylabel("Objective value difference at same time (higher is better)", rotation=-90, va="bottom") + cbar.ax.set_ylabel( + "Objective value difference at same time (higher is better)", rotation=-90, va="bottom" + ) else: raise NotImplementedError(f"Comparison unit '{comparison_unit}' not implemented") @@ -966,17 +990,33 @@ def norm_color_val(v): number = comparison_data[i, j] if np.isnan(number): continue - text = ax.text(j, i, f"{round(number, 1) if number < 100 else round(number)}%", ha="center", va="center", color="white" if (number > 200 or number < 50) else "black", fontsize="small") + text = ax.text( + j, + i, + f"{round(number, 1) if number < 100 else round(number)}%", + ha="center", + va="center", + color="white" if (number > 200 or number < 50) else "black", + fontsize="small", + ) # plot the averages per strategy as labels under the heatmap averages = np.nanmean(comparison_data, axis=0) # add "mean" before the averages - ax.text(-0.5, len(y_ticks)-0.2, "Mean:", ha="right", va="center", color="black", fontsize=10) + ax.text(-0.5, len(y_ticks) - 0.2, "Mean:", ha="right", va="center", color="black", fontsize=10) for i, avg in enumerate(averages): ax.text( - i, len(y_ticks)-0.2, f"{round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black", fontsize="small" + i, + len(y_ticks) - 0.2, + f"{round(avg, 1) if avg < 100 else round(avg)}%", + ha="center", + va="center", + color="black", + fontsize="small", ) - print(f"Averages per strategy at {compare_at_relative_time} relative time: {[(s, a) for s, a in zip(x_ticks, averages)]}") + print( + f"Averages per strategy at {compare_at_relative_time} relative time: {[(s, a) for s, a in zip(x_ticks, averages)]}" + ) # finalize the figure and save or display it fig.tight_layout() @@ -995,20 +1035,23 @@ def norm_color_val(v): if not hasattr(axs, "__len__"): axs = [axs] title = f"""Aggregated Data\napplications: - {', '.join(self.experiment['experimental_groups_defaults']['applications_names'])}\nGPUs: {', '.join(self.experiment['experimental_groups_defaults']['gpus'])}""" + {", ".join(self.experiment["experimental_groups_defaults"]["applications_names"])}\nGPUs: {", ".join(self.experiment["experimental_groups_defaults"]["gpus"])}""" fig.canvas.manager.set_window_title(title) if not save_figs: fig.suptitle(title) # finalize the figure and save or display it lowest_real_y_value = self.plot_strategies_aggregated( - axs[0], aggregation_data, visualization_settings=self.experiment["visualization_settings"], plot_settings=plot + axs[0], + aggregation_data, + visualization_settings=self.experiment["visualization_settings"], + plot_settings=plot, ) if vmin is not None: if isinstance(vmin, (int, float)): axs[0].set_ylim(bottom=vmin) elif vmin == "real": - axs[0].set_ylim(bottom=lowest_real_y_value - (abs(lowest_real_y_value)+1.0) * 0.02) + axs[0].set_ylim(bottom=lowest_real_y_value - (abs(lowest_real_y_value) + 1.0) * 0.02) else: raise NotImplementedError(f"{vmin=} not implemented") fig.tight_layout() @@ -1221,9 +1264,9 @@ def plot_split_times_bar_comparison( strategy_labels = list() for print_skip_key in print_skip: - assert ( - print_skip_key in objective_time_keys - ), f"Each key in print_skip must be in objective_time_keys, {print_skip_key} is not ({objective_time_keys})" + assert print_skip_key in objective_time_keys, ( + f"Each key in print_skip must be in objective_time_keys, {print_skip_key} is not ({objective_time_keys})" + ) # get a dictionary of {time_key: [array_average_time_per_strategy]} data_dict = dict.fromkeys(objective_time_keys) @@ -1298,14 +1341,16 @@ def plot_split_times_bar_comparison( else: plt.show() - def get_head2head_comparison_data(self, aggregation_data: dict, compare_at_relative_time: float, comparison_unit: str) -> dict: + def get_head2head_comparison_data( + self, aggregation_data: dict, compare_at_relative_time: float, comparison_unit: str + ) -> dict: """Gets the data for a head-to-head comparison of strategies across all searchspaces.""" # the comparison data will be a double nested dictionary of the strategy indices comparison_data = dict() for strategy_alpha in self.strategies: - comparison_data[strategy_alpha['display_name']] = dict() + comparison_data[strategy_alpha["display_name"]] = dict() for strategy_beta in self.strategies: - comparison_data[strategy_alpha['display_name']][strategy_beta['display_name']] = list() + comparison_data[strategy_alpha["display_name"]][strategy_beta["display_name"]] = list() # iterate over the searchspaces and strategies to get head2head data for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: @@ -1330,7 +1375,7 @@ def get_head2head_comparison_data(self, aggregation_data: dict, compare_at_relat # for this searchspace, append each strategy's data to the comparison data for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): for strategy_index_beta, strategy_beta in enumerate(self.strategies): - comparison_data[strategy_alpha['display_name']][strategy_beta['display_name']].append( + comparison_data[strategy_alpha["display_name"]][strategy_beta["display_name"]].append( comparison_data_ss[strategy_index_alpha][strategy_index_beta] ) @@ -1360,13 +1405,15 @@ def get_head2head_comparison_data_searchspace( """ comparison_point = x_axis_range[-1] * compare_at_relative_time comparison_data = dict() - confidence_level = 0.95 # irrelevant because the confidence intervals are not used + confidence_level = 0.95 # irrelevant because the confidence intervals are not used minimization = searchspace_stats.minimization - dist = searchspace_stats.objective_performances_total_sorted + dist = searchspace_stats.objective_performances_total_sorted for strategy_index_alpha, strategy_alpha in enumerate(self.strategies): inner_comparison_data = dict() strategy_curve_alpha = strategies_curves[strategy_index_alpha] - _, time_range_alpha, curve_alpha, _, _ = strategy_curve_alpha.get_curve(x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False) + _, time_range_alpha, curve_alpha, _, _ = strategy_curve_alpha.get_curve( + x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False + ) # find the index of the closest time and performance to the comparison point closest_index_alpha = np.argmin(np.abs(time_range_alpha - comparison_point)) @@ -1384,12 +1431,18 @@ def get_head2head_comparison_data_searchspace( inner_comparison_data[strategy_index_beta] = np.nan continue strategy_curve_beta = strategies_curves[strategy_index_beta] - _, time_range_beta, curve_beta, _, _ = strategy_curve_beta.get_curve(x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False) + _, time_range_beta, curve_beta, _, _ = strategy_curve_beta.get_curve( + x_axis_range, x_type, dist=dist, confidence_level=confidence_level, return_split=False + ) # calculate the relative difference between the two strategies at the comparison point if comparison_unit == "time": # given the performance at `compare_at_relative_time`, what is the index of the first time that strategy beta reaches at least the same performance? - index_matching = np.argwhere(curve_beta <= performance_at_comparison_alpha) if minimization else np.argwhere(curve_beta >= performance_at_comparison_alpha) + index_matching = ( + np.argwhere(curve_beta <= performance_at_comparison_alpha) + if minimization + else np.argwhere(curve_beta >= performance_at_comparison_alpha) + ) if index_matching.size == 0: # if strategy beta never reaches the performance of strategy alpha, we cannot compare, instead we take the time at the end so we know what the minimal performance gain is time_at_comparison_beta = time_range_beta[-1] @@ -1400,12 +1453,14 @@ def get_head2head_comparison_data_searchspace( # get the time at which strategy beta reaches the performance of strategy alpha closest_index_beta = index_matching[0][0] # take the first match time_at_comparison_beta = time_range_beta[closest_index_beta] - + # given the performance at `compare_at_relative_time`, how much longer does strategy beta take to get to the same performance compared to strategy alpha? (lower is better) # closest_index_beta = np.argmin(np.abs(curve_beta - performance_at_comparison_alpha)) # time_at_comparison_beta = time_range_beta[closest_index_beta] # outer takes X% of the time inner takes to reach the same performance (100%+percentage change) - percentage_change = (time_at_comparison_alpha - time_at_comparison_beta) / abs(time_at_comparison_beta) * 100 + percentage_change = ( + (time_at_comparison_alpha - time_at_comparison_beta) / abs(time_at_comparison_beta) * 100 + ) inner_comparison_data[strategy_index_beta] = 100 + percentage_change elif comparison_unit == "objective": # given the time at `compare_at_relative_time`, how much worse is the objective value of strategy beta at that moment compared to strategy alpha? (higher is better) @@ -1417,11 +1472,15 @@ def get_head2head_comparison_data_searchspace( # if not minimization: # percentage_change = -percentage_change - percentage_change_norm = (performance_at_comparison_beta_norm - performance_at_comparison_alpha_norm) / abs(performance_at_comparison_beta_norm) * 100 + percentage_change_norm = ( + (performance_at_comparison_beta_norm - performance_at_comparison_alpha_norm) + / abs(performance_at_comparison_beta_norm) + * 100 + ) inner_comparison_data[strategy_index_beta] = 100 + percentage_change_norm else: raise ValueError(f"Invalid comparison unit: {comparison_unit}. Expected 'time' or 'objective'.") - + comparison_data[strategy_index_alpha] = inner_comparison_data return comparison_data @@ -1671,7 +1730,10 @@ def plot_strategies_aggregated( # get the relevant plot settings cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) cutoff_percentile_start: float = self.experiment["statistics_settings"].get("cutoff_percentile_start", 0.01) - xlabel = plot_settings.get("xlabel", f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start*100}% to {cutoff_percentile*100}%)") # noqa: E501 + xlabel = plot_settings.get( + "xlabel", + f"{self.x_metric_displayname['aggregate_time']} ({cutoff_percentile_start * 100}% to {cutoff_percentile * 100}%)", + ) # noqa: E501 ylabel = plot_settings.get("ylabel", self.y_metric_displayname["aggregate_objective"]) tmin = plot_settings.get("tmin", 1.0) @@ -1684,7 +1746,13 @@ def plot_strategies_aggregated( print("Quantification of aggregate performance across all search spaces:") # get the highest real_stopping_point_index, adjust y_axis_size and time_range if necessary - real_stopping_point_indices = [min(round(strategies_real_stopping_point_fraction[strategy_index] * time_range.shape[0]) + 1, time_range.shape[0]) for strategy_index in range(len(strategies_performance))] # noqa: E501 + real_stopping_point_indices = [ + min( + round(strategies_real_stopping_point_fraction[strategy_index] * time_range.shape[0]) + 1, + time_range.shape[0], + ) + for strategy_index in range(len(strategies_performance)) + ] # noqa: E501 real_stopping_point_index_max = max(real_stopping_point_indices) if tmin == "real": # stop the time at the largest real stopping point @@ -1742,9 +1810,9 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_lower_err) - 1 ): ax.fill_between( - time_range[real_stopping_point_index-1:y_axis_size], - strategy_lower_err[real_stopping_point_index-1:y_axis_size], - strategy_upper_err[real_stopping_point_index-1:y_axis_size], + time_range[real_stopping_point_index - 1 : y_axis_size], + strategy_lower_err[real_stopping_point_index - 1 : y_axis_size], + strategy_upper_err[real_stopping_point_index - 1 : y_axis_size], alpha=0.15, antialiased=True, color=color, @@ -1763,8 +1831,8 @@ def plot_strategies_aggregated( and real_stopping_point_index < len(strategy_performance) - 1 ): ax.plot( - time_range[real_stopping_point_index-1:y_axis_size], - strategy_performance[real_stopping_point_index-1:y_axis_size], + time_range[real_stopping_point_index - 1 : y_axis_size], + strategy_performance[real_stopping_point_index - 1 : y_axis_size], color=color, ls="dashed", ) @@ -1788,7 +1856,7 @@ def plot_strategies_aggregated( # set the limits and legend ax.set_ylim(top=1.02) - ax.set_xlim((0, y_axis_size-1)) + ax.set_xlim((0, y_axis_size - 1)) ax.legend() return lowest_real_y_value From c36bf80e31be0f9fde092df505c40a7f13d1281e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:34:57 +0200 Subject: [PATCH 207/234] Further improvements based on linting --- src/autotuning_methodology/baseline.py | 12 +- src/autotuning_methodology/experiments.py | 1 + .../report_experiments.py | 2 + .../searchspace_statistics.py | 2 +- .../visualize_experiments.py | 165 +++++++++--------- .../integration/test_run_experiment.py | 2 +- 6 files changed, 95 insertions(+), 89 deletions(-) diff --git a/src/autotuning_methodology/baseline.py b/src/autotuning_methodology/baseline.py index 52a0f37..f59328b 100644 --- a/src/autotuning_methodology/baseline.py +++ b/src/autotuning_methodology/baseline.py @@ -184,9 +184,9 @@ def get_curve_over_fevals( # noqa: D102 return self._get_random_curve_means(fevals_range) return self._get_random_curve(fevals_range) - def get_curve_over_time( + def get_curve_over_time( # noqa: D102 self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False - ) -> np.ndarray: # noqa: D102 + ) -> np.ndarray: fevals_range = self.time_to_fevals(time_range) curve_over_time = self.get_curve_over_fevals(fevals_range, dist, confidence_level) smoothing_factor = 0.0 @@ -339,9 +339,9 @@ def get_curve_over_fevals( # noqa: D102 assert self.y_array.ndim == 1 return self.y_array[fevals_range] - def get_curve_over_time( + def get_curve_over_time( # noqa: D102 self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False - ) -> np.ndarray: # noqa: D102 + ) -> np.ndarray: predicted_y_values = self._ir.predict(time_range) if not self.use_index: return predicted_y_values @@ -431,9 +431,9 @@ def get_curve_over_fevals( # noqa: D102 ) return self.stochastic_curve_to_deterministic(range=fevals_range, curve=stochastic_curve) - def get_curve_over_time( + def get_curve_over_time( # noqa: D102 self, time_range: np.ndarray, dist=None, confidence_level=None, return_split=False - ) -> np.ndarray: # noqa: D102 + ) -> np.ndarray: if dist is None: dist = self.searchspace_stats.objective_performances_total_sorted if confidence_level is None: diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 6756a25..5faf34d 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -46,6 +46,7 @@ def get_args_from_cli(args=None) -> str: def make_and_check_path(filename: str, parent=None, extension=None) -> Path: + """Checks if the file exists, if not, tries to add the extension.""" filename_path = Path(filename) if filename_path.is_absolute() is False and parent is not None: filename_path = PACKAGE_ROOT / Path(parent).joinpath(filename).resolve() diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 1f4514d..7c53bdf 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -19,6 +19,7 @@ def get_aggregation_data_key(gpu_name: str, application_name: str): Args: gpu_name: the GPU name + application_name: the application name Returns: The key as a string. @@ -43,6 +44,7 @@ def get_aggregation_data( Args: experiment_folderpath: _description_ experiment: _description_ + searchspace_statistics: _description_ strategies: _description_ results_descriptions: _description_ cutoff_percentile: _description_ diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 4fa50b6..7bf9797 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -170,7 +170,7 @@ def __init__( minimization: whether the optimization algorithm was minimizing. objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. - full_search_space_path: the path to the full search space file. + full_search_space_file_path: the path to the full search space file. """ self.loaded = False self.application_name = application_name diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index df97845..f185e46 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -10,8 +10,7 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import to_rgb, to_hex, LinearSegmentedColormap -# from matplotlib.colors import LinearSegmentedColormap, rgb2hex +from matplotlib.colors import to_rgb, to_hex, rgb2hex, LinearSegmentedColormap from autotuning_methodology.baseline import ( Baseline, @@ -762,87 +761,87 @@ def norm_color_val(v): else: plt.show() elif style == "compare_heatmaps": - comparisons = plot["comparison"] raise NotImplementedError("Still a work in progress") - # set up the plot - fig, axs = plt.subplots( - ncols=1, figsize=(9, 6), dpi=300 - ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. - if not hasattr(axs, "__len__"): - axs = [axs] - # title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" - # fig.canvas.manager.set_window_title(title) - # if not save_figs: - # fig.suptitle(title) - - for comparison in comparisons: - strategy_names = comparisons["strategies"] - strategies = [s for s in self.strategies if s["name"]] - # for strategy in strategies: - strategy_displayname = strategy["display_name"] - strategy_data = data_collected[strategy_name] - - # get the performance per selected type in an array - plot_data = np.stack(np.array([t[2] for t in strategy_data])) - cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) - cutoff_percentile_start: float = self.experiment["statistics_settings"].get( - "cutoff_percentile_start", 0.01 - ) - label_data = { - "gpus": ( - list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), - "GPUs", - ), - "applications": ( - list( - dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) - ), - "Applications", - ), - "searchspaces": ( - list( - dict.fromkeys( - [ - f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") - for t in strategy_data - ] - ) - ), - "Searchspaces", - ), - "time": ( - np.round(np.linspace(0.0, 1.0, bins), 2), - f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", - ), - } - x_ticks = label_data[x_type][0] - y_ticks = label_data[y_type][0] - if (x_type == "time" and y_type == "searchspaces") or ( - x_type == "searchspaces" and y_type == "time" - ): - plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) - if x_type == "searchspaces": - plot_data = plot_data.transpose() - elif (x_type == "gpus" and y_type == "applications") or ( - y_type == "gpus" and x_type == "applications" - ): - plot_data = np.reshape( - plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) - ) - if x_type == "gpus": - plot_data = np.transpose(plot_data) - else: - raise NotImplementedError( - f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." - ) - - # validate the data - outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( - f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" - ) + # comparisons = plot["comparison"] + # # set up the plot + # fig, axs = plt.subplots( + # ncols=1, figsize=(9, 6), dpi=300 + # ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. + # if not hasattr(axs, "__len__"): + # axs = [axs] + # # title = f"Performance of {strategy_displayname} over {'+'.join(plot_x_value_types)},{'+'.join(plot_y_value_types)}" + # # fig.canvas.manager.set_window_title(title) + # # if not save_figs: + # # fig.suptitle(title) + + # for comparison in comparisons: + # strategy_names = comparisons["strategies"] + # strategies = [s for s in self.strategies if s["name"]] + # # for strategy in strategies: + # strategy_displayname = strategy["display_name"] + # strategy_data = data_collected[strategy_name] + + # # get the performance per selected type in an array + # plot_data = np.stack(np.array([t[2] for t in strategy_data])) + # cutoff_percentile: float = self.experiment["statistics_settings"].get("cutoff_percentile", 1) + # cutoff_percentile_start: float = self.experiment["statistics_settings"].get( + # "cutoff_percentile_start", 0.01 + # ) + # label_data = { + # "gpus": ( + # list(dict.fromkeys([t[0].replace(remove_from_gpus_label, "") for t in strategy_data])), + # "GPUs", + # ), + # "applications": ( + # list( + # dict.fromkeys([t[1].replace(remove_from_applications_label, "") for t in strategy_data]) + # ), + # "Applications", + # ), + # "searchspaces": ( + # list( + # dict.fromkeys( + # [ + # f"{t[1]} on\n{t[0]}".replace(remove_from_searchspace_label, "") + # for t in strategy_data + # ] + # ) + # ), + # "Searchspaces", + # ), + # "time": ( + # np.round(np.linspace(0.0, 1.0, bins), 2), + # f"Fraction of time between {cutoff_percentile_start * 100}% and {cutoff_percentile * 100}%", + # ), + # } + # x_ticks = label_data[x_type][0] + # y_ticks = label_data[y_type][0] + # if (x_type == "time" and y_type == "searchspaces") or ( + # x_type == "searchspaces" and y_type == "time" + # ): + # plot_data: np.ndarray = np.stack(np.array([t[3] for t in strategy_data])) + # if x_type == "searchspaces": + # plot_data = plot_data.transpose() + # elif (x_type == "gpus" and y_type == "applications") or ( + # y_type == "gpus" and x_type == "applications" + # ): + # plot_data = np.reshape( + # plot_data, (len(label_data["gpus"][0]), len(label_data["applications"][0])) + # ) + # if x_type == "gpus": + # plot_data = np.transpose(plot_data) + # else: + # raise NotImplementedError( + # f"Heatmap has not yet been implemented for {x_type}, {y_type}. Submit an issue to request it." + # ) + + # # validate the data + # outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) + # assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( + # f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" + # ) else: raise NotImplementedError(f"Invalid {style=}") @@ -1422,7 +1421,11 @@ def get_head2head_comparison_data_searchspace( absolute_optimum = searchspace_stats.total_performance_absolute_optimum() median = searchspace_stats.total_performance_median() - normalize = lambda val: (val - median) / (absolute_optimum - median) + def normalize(val): + """Min-max normalization of the performance value.""" + if absolute_optimum == median: + return 0.0 + return (val - median) / (absolute_optimum - median) performance_at_comparison_alpha_norm = normalize(performance_at_comparison_alpha) # compare against all other strategies diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 3d8959b..9b29f2a 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -96,7 +96,7 @@ def test_CLI_input(): with pytest.raises(SystemExit) as e: dummy_args = ["-dummy_arg=option"] get_args_from_cli(dummy_args) - assert e.type == SystemExit + assert isinstance(e.type, SystemExit) assert e.value.code == 2 # improper input 2 From 3181010e88c2e5bae236d1a535c627f847a37920 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:35:07 +0200 Subject: [PATCH 208/234] Further improvements based on linting --- experiment_files/convert_old_to_new_format.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experiment_files/convert_old_to_new_format.py b/experiment_files/convert_old_to_new_format.py index f238ada..d2cf5f8 100644 --- a/experiment_files/convert_old_to_new_format.py +++ b/experiment_files/convert_old_to_new_format.py @@ -1,3 +1,5 @@ +"""Module to convert the old experiments file format into the new format.""" + # script to convert the old experiments file format into the new format import json from pathlib import Path From 36ad35ae3a7329f044ef703df0840cfbe74380bd Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:35:37 +0200 Subject: [PATCH 209/234] Updated benchmark_hub submodule --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index 8eeb99e..ff76e2c 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit 8eeb99e7179e6838af95d392ac6c60cebfd8c434 +Subproject commit ff76e2c86a7d9b3f389038589660e8b6ef4b4a5e From 1cc9cdbf9b281615ba59882e6b9ca7a15d7088e7 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 16:36:22 +0200 Subject: [PATCH 210/234] Further specification of linting --- noxfile.py | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index bf5b957..dc6821a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,7 +16,7 @@ def lint(session: nox.Session) -> None: """Ensure the code is formatted as expected.""" session.install("ruff") - session.run("ruff", "check", "--output-format=github", "--config=pyproject.toml", ".") + session.run("ruff", "check", "--output-format=github", "--config=pyproject.toml", "src") # @nox.session # uncomment this line to only run on the current python interpreter diff --git a/pyproject.toml b/pyproject.toml index 5af7e80..338351d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,5 +83,6 @@ select = [ "D", # pydocstyle, "NPY201", # Numpy 2.0 compatibility ] +ignore = ["E501"] [tool.ruff.lint.pydocstyle] convention = "google" From 348e91ccd90a644b55453039f8d154ed889261d5 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 3 Jul 2025 19:45:03 +0200 Subject: [PATCH 211/234] Minor improvements based on tests --- .../visualize_experiments.py | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index f185e46..0ba73c7 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -4,13 +4,13 @@ import warnings from collections import defaultdict -from pathlib import Path from math import ceil +from pathlib import Path import matplotlib.pyplot as plt import numpy as np from matplotlib.cm import get_cmap -from matplotlib.colors import to_rgb, to_hex, rgb2hex, LinearSegmentedColormap +from matplotlib.colors import LinearSegmentedColormap, rgb2hex, to_hex, to_rgb from autotuning_methodology.baseline import ( Baseline, @@ -474,9 +474,9 @@ def norm_color_val(v): for strategy in self.strategies: strategy_name = strategy["name"] strategy_displayname = strategy["display_name"] - assert sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1, ( - f"Strategy name '{strategy_name}' is not unqiue" - ) + assert ( + sum([1 for s in self.strategies if s["name"] == strategy_name]) == 1 + ), f"Strategy name '{strategy_name}' is not unqiue" # get the data from the collected aggregated data for gpu_name in self.experiment["experimental_groups_defaults"]["gpus"]: @@ -514,9 +514,9 @@ def norm_color_val(v): if combine else x_axis_range_real ) - assert np.array_equal(time_range, x_axis_range, equal_nan=True), ( - "time_range != x_axis_range" - ) + assert np.array_equal( + time_range, x_axis_range, equal_nan=True + ), "time_range != x_axis_range" curve = np.concatenate([curve_real, curve_fictional]) if combine else curve_real # get the standardised curves and write them to the collector curve: np.ndarray = random_baseline.get_standardised_curves( @@ -596,15 +596,17 @@ def norm_color_val(v): ) # validate the data is within the vmin-vmax range and visible colorbar range - assert not (plot_data > 1.0).any(), ( + assert not ( + plot_data > 1.0 + ).any(), ( "Plot data contains values greater than 1.0, which should not be possible. Please investigate." ) if cap_to_vmin: plot_data = np.clip(plot_data, vmin, 1.0) outside_range = np.where(np.logical_or(plot_data < vmin, plot_data > vmax)) - assert len(outside_range[0]) == 0 and len(outside_range[1]) == 0, ( - f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" - ) + assert ( + len(outside_range[0]) == 0 and len(outside_range[1]) == 0 + ), f"There are values outside of the range ({vmin}, {vmax}): {plot_data[outside_range]} ({outside_range} for strategy {strategy_displayname})" outside_visible_range = np.where(np.logical_or(plot_data < cmin, plot_data > cmax)) if not (len(outside_visible_range[0]) == 0 and len(outside_visible_range[1]) == 0): warnings.warn( @@ -748,9 +750,9 @@ def norm_color_val(v): fig.tight_layout() if save_figs: suffix = "" - if include_colorbar: + if include_colorbar and not (x_type == "time" or y_type == "time"): suffix += "_colorbar" - if include_y_labels: + if include_y_labels and not (x_type == "time" or y_type == "time"): suffix += "_ylabels" filename_path = ( Path(self.plot_filename_prefix) @@ -1263,9 +1265,9 @@ def plot_split_times_bar_comparison( strategy_labels = list() for print_skip_key in print_skip: - assert print_skip_key in objective_time_keys, ( - f"Each key in print_skip must be in objective_time_keys, {print_skip_key} is not ({objective_time_keys})" - ) + assert ( + print_skip_key in objective_time_keys + ), f"Each key in print_skip must be in objective_time_keys, {print_skip_key} is not ({objective_time_keys})" # get a dictionary of {time_key: [array_average_time_per_strategy]} data_dict = dict.fromkeys(objective_time_keys) @@ -1421,11 +1423,13 @@ def get_head2head_comparison_data_searchspace( absolute_optimum = searchspace_stats.total_performance_absolute_optimum() median = searchspace_stats.total_performance_median() + def normalize(val): """Min-max normalization of the performance value.""" if absolute_optimum == median: return 0.0 return (val - median) / (absolute_optimum - median) + performance_at_comparison_alpha_norm = normalize(performance_at_comparison_alpha) # compare against all other strategies From a0774d4abdf3976fbd10461b2d295a48ac589a5c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 3 Jul 2025 19:45:26 +0200 Subject: [PATCH 212/234] Adjusted tests to recent changes --- .../integration/test_run_experiment.py | 2 +- .../integration/test_visualization.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/autotuning_methodology/integration/test_run_experiment.py b/tests/autotuning_methodology/integration/test_run_experiment.py index 9b29f2a..8db7d78 100644 --- a/tests/autotuning_methodology/integration/test_run_experiment.py +++ b/tests/autotuning_methodology/integration/test_run_experiment.py @@ -96,7 +96,7 @@ def test_CLI_input(): with pytest.raises(SystemExit) as e: dummy_args = ["-dummy_arg=option"] get_args_from_cli(dummy_args) - assert isinstance(e.type, SystemExit) + assert e.type == SystemExit # noqa: E721 assert e.value.code == 2 # improper input 2 diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index d59cb42..214bba9 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -21,7 +21,7 @@ experiment_title = f"{kernel_id}_on_mock_GPU" plot_path_fevals = plot_path / f"{experiment_title}_fevals.png" plot_path_time = plot_path / f"{experiment_title}_time.png" -plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus.png" +plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus_colorbar.png" plot_path_heatmap_time = plot_path / "random_sample_10_iter_heatmap_time_searchspaces.png" plot_path_aggregated = plot_path / "aggregated.png" plot_path_split_times_fevals = plot_path / f"{experiment_title}_split_times_fevals.png" @@ -83,5 +83,8 @@ def test_visualize_experiment(): @pytest.mark.parametrize("plot_filepath", plot_filepaths) def test_visualized_plot(plot_filepath: Path): """Test whether valid plots have been produced.""" + assert plot_path.exists() for plot_filepath in plot_filepaths: - assert plot_filepath.exists(), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" + assert ( + plot_filepath.exists() + ), f"{plot_filepath} does not exist, files in folder: {[f.name for f in plot_filepath.parent.iterdir() if f.is_file()]}" From 47e11b464583f1d4ac691434a0a7e79c8ff527c9 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 3 Jul 2025 20:15:56 +0200 Subject: [PATCH 213/234] Adjusted head2head plot generation to be reselient to NaNs --- src/autotuning_methodology/visualize_experiments.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 0ba73c7..59b945e 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -964,6 +964,8 @@ def norm_color_val(v): # cmin = np.nanmin(comparison_data_clipped) cmin = vmin # always show 0.0 as the start max_val = np.nanmax(comparison_data_clipped) + if np.isnan(max_val): + max_val = vmax # round to the nearest 100 cmax = round(ceil(max_val), -2) if cmax < max_val: @@ -1009,7 +1011,7 @@ def norm_color_val(v): ax.text( i, len(y_ticks) - 0.2, - f"{round(avg, 1) if avg < 100 else round(avg)}%", + f"{"NaN" if np.isnan(avg) else round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black", From 82293114a8d35166dc6cd169eb2e65a8bb87adc4 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Thu, 3 Jul 2025 20:16:55 +0200 Subject: [PATCH 214/234] Added head2head plot to tests --- .../integration/mockfiles/test.json | 9 +++++++++ .../integration/test_visualization.py | 1 + 2 files changed, 10 insertions(+) diff --git a/tests/autotuning_methodology/integration/mockfiles/test.json b/tests/autotuning_methodology/integration/mockfiles/test.json index 5b48084..b477359 100644 --- a/tests/autotuning_methodology/integration/mockfiles/test.json +++ b/tests/autotuning_methodology/integration/mockfiles/test.json @@ -85,6 +85,15 @@ ], "bins": 10 }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + }, + "annotate": true + }, { "scope": "aggregate", "style": "line", diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index 214bba9..d29aa36 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -23,6 +23,7 @@ plot_path_time = plot_path / f"{experiment_title}_time.png" plot_path_heatmap = plot_path / "random_sample_10_iter_heatmap_applications_gpus_colorbar.png" plot_path_heatmap_time = plot_path / "random_sample_10_iter_heatmap_time_searchspaces.png" +plot_path_aggregated_head2head = plot_path / "head2head_comparison_time.png" plot_path_aggregated = plot_path / "aggregated.png" plot_path_split_times_fevals = plot_path / f"{experiment_title}_split_times_fevals.png" plot_path_split_times_time = plot_path / f"{experiment_title}_split_times_time.png" From 90d471e796318070cee9ff816649a1aa3a6cb680 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 3 Jul 2025 21:39:58 +0200 Subject: [PATCH 215/234] Minor improvements to head2head plots --- src/autotuning_methodology/visualize_experiments.py | 2 +- tests/autotuning_methodology/integration/test_visualization.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 59b945e..30d007c 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -1011,7 +1011,7 @@ def norm_color_val(v): ax.text( i, len(y_ticks) - 0.2, - f"{"NaN" if np.isnan(avg) else round(avg, 1) if avg < 100 else round(avg)}%", + f"{'NaN' if np.isnan(avg) else round(avg, 1) if avg < 100 else round(avg)}%", ha="center", va="center", color="black", diff --git a/tests/autotuning_methodology/integration/test_visualization.py b/tests/autotuning_methodology/integration/test_visualization.py index d29aa36..1fc682e 100644 --- a/tests/autotuning_methodology/integration/test_visualization.py +++ b/tests/autotuning_methodology/integration/test_visualization.py @@ -35,6 +35,7 @@ plot_path_heatmap, plot_path_heatmap_time, plot_path_aggregated, + plot_path_aggregated_head2head, plot_path_split_times_fevals, plot_path_split_times_time, plot_path_split_times_bar, From f53955e12ba9a14df0160234bdcb018b838e7962 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 18 Jul 2025 15:49:06 +0200 Subject: [PATCH 216/234] Implemented quick and optional full validation of T4 files on loading, much improving performance --- src/autotuning_methodology/experiments.py | 3 ++- src/autotuning_methodology/formats_interface.py | 7 +++++++ src/autotuning_methodology/report_experiments.py | 5 +++-- .../searchspace_statistics.py | 13 +++++++++---- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index 5faf34d..cc89b88 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -464,7 +464,7 @@ def generate_experiment_file( return experiment_file_path.resolve() -def execute_experiment(filepath: str, profiling: bool = False): +def execute_experiment(filepath: str, profiling: bool = False, full_validate_on_load: bool = True): """Executes the experiment by retrieving it from the cache or running it. Args: @@ -545,6 +545,7 @@ def execute_experiment(filepath: str, profiling: bool = False): objective_time_keys=objective_time_keys, objective_performance_keys=objective_performance_keys, full_search_space_file_path=full_search_space_file_path, + full_validate=full_validate_on_load, ) # calculation of budget can be done only now, after searchspace statistics have been initialized diff --git a/src/autotuning_methodology/formats_interface.py b/src/autotuning_methodology/formats_interface.py index 5214529..4d6fa58 100644 --- a/src/autotuning_methodology/formats_interface.py +++ b/src/autotuning_methodology/formats_interface.py @@ -20,9 +20,16 @@ def load_T4_format(filepath: Path, validate: True) -> dict: contents = orig_contents[:-2] + "}\n}" data = json.loads(contents) + # validate the data if validate: # validate it is in T4 format validate_T4(data) + else: + # if not validating, we still want to do a basic check of the format + assert isinstance(data, dict), "T4 format file should be a dictionary." + assert "metadata" in data, "T4 format file should contain metadata." + assert "schema_version" in data, "T4 format file should contain schema_version." + assert "results" in data, "T4 format file should contain results." # return the T4 data return data diff --git a/src/autotuning_methodology/report_experiments.py b/src/autotuning_methodology/report_experiments.py index 7c53bdf..a37b2aa 100644 --- a/src/autotuning_methodology/report_experiments.py +++ b/src/autotuning_methodology/report_experiments.py @@ -184,19 +184,20 @@ def get_strategies_aggregated_performance( ) -def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None): +def get_strategy_scores(experiment_filepath: str, use_strategy_as_baseline=None, full_validate_on_load=True): """Function to get performance scores per strategy by running the passed experiments file. Args: experiment_filepath: the path to the experiment-filename.json to run. use_strategy_as_baseline: whether to use an executed strategy as the baseline. Defaults to None. + full_validate_on_load: whether to fully validate the T4 format file. Defaults to True. Returns: a dictionary of the strategies, with the performance score and error for each strategy. """ # execute the experiment if necessary, else retrieve it experiment, strategies, searchspace_statistics, results_descriptions = execute_experiment( - experiment_filepath, profiling=False + experiment_filepath, profiling=False, full_validate_on_load=full_validate_on_load ) # get the settings diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index 7bf9797..ad16784 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -59,6 +59,10 @@ def is_not_invalid_value(value, performance: bool) -> bool: invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time return not invalid_check_function(value) +def filter_invalids(values: np.ndarray, performance: bool) -> np.ndarray: + """Filter out invalid values from the array.""" + return np.array([v for v in values if is_not_invalid_value(v, performance)]) + def to_valid_array( results: list[dict], @@ -112,7 +116,7 @@ def to_valid_array( if isinstance(value, (list, tuple, np.ndarray)): # if the value is an array, sum the valid values array = value - list_to_sum = list(v for v in array if is_not_invalid_value(v, performance)) + list_to_sum = list(v for v in array if is_not_invalid_value(v, performance)) # TODO optimize this try: values[value_index] = ( sum(list_to_sum) @@ -161,6 +165,7 @@ def __init__( objective_time_keys: list[str], objective_performance_keys: list[str], full_search_space_file_path: str, + full_validate: bool = True, ) -> None: """Initialization method for a Searchspace statistics object. @@ -181,7 +186,7 @@ def __init__( self.full_search_space_file_path = full_search_space_file_path # load the data into the arrays - self.loaded = self._load() + self.loaded = self._load(validate=full_validate) def T4_time_keys_to_kernel_tuner_time_keys(self, time_keys: list[str]) -> list[str]: """Temporary utility function to use the kernel tuner search space files with the T4 output format. @@ -408,11 +413,11 @@ def get_valid_filepath(self) -> Path: ) return filepath - def _load(self) -> bool: + def _load(self, validate=True) -> bool: """Load the contents of the full search space file.""" # if not, use a script to create a file with values from KTT output and formatting of KernelTuner filepath = self.get_valid_filepath() - data = load_T4_format(filepath, validate=True) + data = load_T4_format(filepath, validate=validate) metadata: dict = data.get("metadata", {}) timeunit = metadata.get("timeunit", "seconds") results: dict = data["results"] From 608154216590cbc5b22ab4ade9f2bc45a0ea756d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 19 Jul 2025 01:09:45 +0200 Subject: [PATCH 217/234] Improved to_valid_array performance --- .../searchspace_statistics.py | 24 ++++++++++++------- src/autotuning_methodology/validators.py | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index ad16784..cf6602d 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -59,10 +59,18 @@ def is_not_invalid_value(value, performance: bool) -> bool: invalid_check_function = is_invalid_objective_performance if performance else is_invalid_objective_time return not invalid_check_function(value) -def filter_invalids(values: np.ndarray, performance: bool) -> np.ndarray: - """Filter out invalid values from the array.""" - return np.array([v for v in values if is_not_invalid_value(v, performance)]) - +def filter_invalids(values, performance: bool) -> list: + """Filter out invalid values from the array. + + Assumes that `values` is a list or array of values. + If changes are made here, also change `is_invalid_objective_time`. + """ + if performance or any([isinstance(v, (str, list, tuple, np.ndarray)) for v in values]): + # if there are any non-numeric values, fall back to a list comprehension + return list([v for v in values if is_not_invalid_value(v, performance)]) + # invalid time values can be checked for the entire array at once, much faster than iterating + array = np.array(values) + return array[(~np.isnan(array)) & (array >= 0.0)].tolist() def to_valid_array( results: list[dict], @@ -115,12 +123,12 @@ def to_valid_array( for value_index, value in enumerate(values): if isinstance(value, (list, tuple, np.ndarray)): # if the value is an array, sum the valid values - array = value - list_to_sum = list(v for v in array if is_not_invalid_value(v, performance)) # TODO optimize this + list_to_sum = filter_invalids(value, performance) try: + sum_of_list = sum(list_to_sum) values[value_index] = ( - sum(list_to_sum) - if len(list_to_sum) > 0 and is_not_invalid_value(sum(list_to_sum), performance) + sum_of_list + if len(list_to_sum) > 0 and is_not_invalid_value(sum_of_list, performance) else np.nan ) except TypeError as e: diff --git a/src/autotuning_methodology/validators.py b/src/autotuning_methodology/validators.py index d6d4176..7c4469d 100644 --- a/src/autotuning_methodology/validators.py +++ b/src/autotuning_methodology/validators.py @@ -76,7 +76,7 @@ def is_invalid_objective_performance(objective_performance: float) -> bool: def is_invalid_objective_time(objective_time: float) -> bool: - """Returns whether an objective time is invalid. + """Returns whether an objective time is invalid. If changes are made here, also change `filter_invalids`. Args: objective_time: the objective time value to check. From c8a1f04b22301e606782bc67a4055f932e312f30 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 19 Jul 2025 01:10:37 +0200 Subject: [PATCH 218/234] Updated benchmarkhub --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index ff76e2c..ca104e9 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit ff76e2c86a7d9b3f389038589660e8b6ef4b4a5e +Subproject commit ca104e907d22c7a78067c234fb933da731d18a90 From 9312c4c0b33e8960403f56557f7c8d36a940e585 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 25 Jul 2025 18:52:46 +0200 Subject: [PATCH 219/234] Improvements to docstrings --- src/autotuning_methodology/experiments.py | 1 + src/autotuning_methodology/searchspace_statistics.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/experiments.py b/src/autotuning_methodology/experiments.py index cc89b88..0c64184 100755 --- a/src/autotuning_methodology/experiments.py +++ b/src/autotuning_methodology/experiments.py @@ -470,6 +470,7 @@ def execute_experiment(filepath: str, profiling: bool = False, full_validate_on_ Args: filepath: path to the experiments .json file. profiling: whether profiling is enabled. Defaults to False. + full_validate_on_load: whether to fully validate the searchspace statistics file on load. Defaults to True. Raises: FileNotFoundError: if the path to the kernel specified in the experiments file is not found. diff --git a/src/autotuning_methodology/searchspace_statistics.py b/src/autotuning_methodology/searchspace_statistics.py index cf6602d..43d46c7 100644 --- a/src/autotuning_methodology/searchspace_statistics.py +++ b/src/autotuning_methodology/searchspace_statistics.py @@ -60,9 +60,9 @@ def is_not_invalid_value(value, performance: bool) -> bool: return not invalid_check_function(value) def filter_invalids(values, performance: bool) -> list: - """Filter out invalid values from the array. + """Filter out invalid values from the array. - Assumes that `values` is a list or array of values. + Assumes that `values` is a list or array of values. If changes are made here, also change `is_invalid_objective_time`. """ if performance or any([isinstance(v, (str, list, tuple, np.ndarray)) for v in values]): @@ -184,6 +184,7 @@ def __init__( objective_time_keys: the objective time keys used. objective_performance_keys: the objective performance keys used. full_search_space_file_path: the path to the full search space file. + full_validate: whether to fully validate the searchspace statistics file on load. Defaults to True. """ self.loaded = False self.application_name = application_name From f78802be9dfda264a2f799e13631b0647e6cc4c2 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 28 Jul 2025 14:53:56 +0200 Subject: [PATCH 220/234] Remove the second color (orange) to avoid confusion with the fourth (red) --- src/autotuning_methodology/visualize_experiments.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 30d007c..0a6adfd 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -47,6 +47,7 @@ def lighten_color(color, amount: float = 0.5): def get_colors(strategies: list[dict]) -> list: """Assign colors using the tab10 colormap, with lighter shades for children.""" tab10 = plt.get_cmap("tab10").colors + tab10 = [c for i, c in enumerate(tab10) if i != 1] # remove the second color (orange) to avoid confusion with the fourth (red) max_parents = len(tab10) strategy_parents = defaultdict(list) @@ -244,7 +245,7 @@ def __init__( compare_split_times: bool = self.experiment["visualization_settings"]["compare_split_times"] confidence_level: float = self.experiment["visualization_settings"]["confidence_level"] self.colors = get_colors(self.strategies) - # self.colors = get_colors( + # self.colors = get_colors_old( # self.strategies, # scale_margin_left=self.experiment["visualization_settings"].get("color_parent_scale_margin_left", 0.4), # scale_margin_right=self.experiment["visualization_settings"].get("color_parent_scale_margin_right", 0.1), From 161f093c54b31ffa9b4f1f8b42739b553aa3da00 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 7 Aug 2025 09:03:28 +0200 Subject: [PATCH 221/234] Implemented color index override for parent strategies --- .../schemas/experiments.json | 9 +++++++++ .../visualize_experiments.py | 18 ++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index c934f98..0efd222 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -211,6 +211,15 @@ "description": "Name for the search strategy used in visualizations", "type": "string" }, + "color_parent": { + "description": "Name of the parent search strategy to use a shade of its color for this search strategy.", + "type": "string" + }, + "color_index": { + "description": "Override of the index of the color in the matplotlib colormap to use for this search strategy. Only use on strategy parents. If set, all strategy parents must have it.", + "type": "integer", + "minimum": 0 + }, "stochastic": { "description": "Whether the repeated runs of the same experimental group (combination of application, GPU and search strategy) exhibit stochastic behaviour, e.g. due to stochastic search strategy", "type": "boolean", diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 0a6adfd..23c1c19 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -50,11 +50,18 @@ def get_colors(strategies: list[dict]) -> list: tab10 = [c for i, c in enumerate(tab10) if i != 1] # remove the second color (orange) to avoid confusion with the fourth (red) max_parents = len(tab10) strategy_parents = defaultdict(list) + override_index = False - # Group children under their parents + # Group children under their parents and check for overriden color indices for i, strategy in enumerate(strategies): if "color_parent" in strategy: strategy_parents[strategy["color_parent"]].append(i) + if "color_index" in strategy: + override_index = True + if "color_parent" in strategy: + raise ValueError( + f"Strategy '{strategy['name']}' has both 'color_index' and 'color_parent' defined, which is not allowed." + ) if len(strategy_parents) > max_parents: raise ValueError(f"Too many color parents: max supported is {max_parents} using tab10") @@ -69,6 +76,9 @@ def get_colors(strategies: list[dict]) -> list: children_indices = strategy_parents[name] if len(children_indices) > 2: raise ValueError(f"Color parent '{name}' has more than two children") + if override_index: + assert "color_index" in strategy, f"All strategies, including '{name}', must have either 'color_index' or 'color_parent' if 'color_index' is used anywhere." + color_index = strategy["color_index"] base_color = tab10[color_index] parent_colors[name] = { idx: lighten_color(base_color, amount=0.4 + 0.3 * j) for j, idx in enumerate(children_indices) @@ -77,6 +87,7 @@ def get_colors(strategies: list[dict]) -> list: color_index += 1 elif "color_parent" in strategy: parent = strategy["color_parent"] + assert parent in parent_colors, f"Parent '{parent}' for strategy '{name}' not found in parent colors - child strategies must be defined after their parents." colors[i] = parent_colors[parent][i] else: if color_index >= len(tab10): @@ -1033,8 +1044,11 @@ def norm_color_val(v): # plot the aggregation if style == "line" and (continue_after_comparison or not (compare_baselines or compare_split_times)): + # fig, axs = plt.subplots( + # ncols=1, figsize=(6.8, 4.0), dpi=300 + # ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. fig, axs = plt.subplots( - ncols=1, figsize=(6.8, 4.0), dpi=300 + ncols=1, figsize=(8.5, 5.0), dpi=300 ) # if multiple subplots, pass the axis to the plot function with axs[0] etc. if not hasattr(axs, "__len__"): axs = [axs] From b9aac41e88cc8ef0f084b17d8ebfc5761d7ac14b Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Thu, 7 Aug 2025 09:03:58 +0200 Subject: [PATCH 222/234] Implemented extensive comparison for hyperparameter tuning paper --- .../compare_hypertuners_paper_extensive.json | 361 ++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 experiment_files/compare_hypertuners_paper_extensive.json diff --git a/experiment_files/compare_hypertuners_paper_extensive.json b/experiment_files/compare_hypertuners_paper_extensive.json new file mode 100644 index 0000000..1fd39ee --- /dev/null +++ b/experiment_files/compare_hypertuners_paper_extensive.json @@ -0,0 +1,361 @@ +{ + "version": "1.2.0", + "name": "Compare hyperparameter tuning extensive", + "parent_folder": "/var/scratch/fjwillem/hyperparametertuning_milo", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "genetic_algorithm_tuned_extensive_6", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 26 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "single_point" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm extensive", + "autotuner": "KernelTuner", + "color_index": 1 + }, + { + "name": "genetic_algorithm_tuned", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "single_point" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm limited", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_tuned_extensive_6" + }, + { + "name": "genetic_algorithm_mean", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "method", + "value": "disruptive_uniform" + }, + { + "name": "mutation_chance", + "value": 20 + } + ], + "display_name": "Genetic Algorithm mean", + "autotuner": "KernelTuner", + "color_parent": "genetic_algorithm_tuned_extensive_6" + }, + { + "name": "pso_tuned_extensive", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 50 + }, + { + "name": "maxiter", + "value": 190 + }, + { + "name": "c1", + "value": 3.5 + }, + { + "name": "c2", + "value": 1.0 + } + ], + "display_name": "PSO extensive", + "autotuner": "KernelTuner", + "color_index": 2 + }, + { + "name": "pso_tuned", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 30 + }, + { + "name": "maxiter", + "value": 100 + }, + { + "name": "c1", + "value": 3.0 + }, + { + "name": "c2", + "value": 0.5 + } + ], + "display_name": "PSO limited", + "autotuner": "KernelTuner", + "color_parent": "pso_tuned_extensive" + }, + { + "name": "pso_mean", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 20 + }, + { + "name": "maxiter", + "value": 50 + }, + { + "name": "c1", + "value": 1.0 + }, + { + "name": "c2", + "value": 1.0 + } + ], + "display_name": "PSO mean", + "autotuner": "KernelTuner", + "color_parent": "pso_tuned_extensive" + }, + { + "name": "simulated_annealing_tuned_extensive", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 0.1 + }, + { + "name": "T_min", + "value": 0.0001 + }, + { + "name": "alpha", + "value": 0.9975 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing extensive", + "autotuner": "KernelTuner", + "color_index": 3 + }, + { + "name": "simulated_annealing_tuned", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 0.5 + }, + { + "name": "T_min", + "value": 0.001 + }, + { + "name": "alpha", + "value": 0.9975 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing limited", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_tuned_extensive" + }, + { + "name": "simulated_annealing_mean", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 1.0 + }, + { + "name": "T_min", + "value": 0.001 + }, + { + "name": "alpha", + "value": 0.995 + }, + { + "name": "maxiter", + "value": 2 + } + ], + "display_name": "Simulated Annealing mean", + "autotuner": "KernelTuner", + "color_parent": "simulated_annealing_tuned_extensive" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -6.0, + "include_y_labels": true, + "include_colorbar": false, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": false + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -6.0, + "cnum": 8, + "include_y_labels": false, + "include_colorbar": true, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": false + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file From 0feecd2ca616b733aaf562ce45f5cf99f7acef1e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Sat, 9 Aug 2025 15:04:35 +0200 Subject: [PATCH 223/234] Automatic compression and memory pressure reduction in results collection --- src/autotuning_methodology/curves.py | 7 ++- src/autotuning_methodology/runner.py | 74 +++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/src/autotuning_methodology/curves.py b/src/autotuning_methodology/curves.py index 6094d3e..d05d0ab 100644 --- a/src/autotuning_methodology/curves.py +++ b/src/autotuning_methodology/curves.py @@ -33,6 +33,11 @@ def get_indices_in_distribution( A NumPy array of type float of the same shape as `draws`, with NaN where not found in `dist`. """ assert dist.ndim == 1, f"distribution can not have more than one dimension, has {dist.ndim}" + if draws.dtype != dist.dtype: + warn( + f"Draws dtype {draws.dtype} does not match distribution dtype {dist.dtype}, converting dist to draws dtype", + ) + dist = dist.astype(draws.dtype) # check whether the distribution is correctly ordered if not skip_dist_check: @@ -52,7 +57,7 @@ def get_indices_in_distribution( assert sorter.shape == dist.shape, "The shape of the sorter must be the same as the distribution" # find the index of each draw in the distribution - indices_found = np.searchsorted(dist, draws, side="left", sorter=sorter).astype(float) + indices_found = np.searchsorted(dist, draws, side="left", sorter=sorter).astype(draws.dtype) assert indices_found.shape == draws.shape, "The shape of the indices must match the shape of the draws" # if indices found are outside the array, make them NaN diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index b560818..7cecd2e 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -8,6 +8,7 @@ import time as python_time import warnings from pathlib import Path +import pickle, gzip # compression libraries if necessary for collecting results import numpy as np import progressbar @@ -216,6 +217,7 @@ def collect_results( results_description: ResultsDescription, searchspace_stats: SearchspaceStatistics, profiling: bool, + compress: bool = True, ) -> ResultsDescription: """Executes optimization algorithms on tuning problems to capture their behaviour. @@ -229,6 +231,11 @@ def collect_results( Returns: The ``ResultsDescription`` object with the results. """ + if profiling: + import psutil, os + process = psutil.Process(os.getpid()) + warnings.warn(f"Memory usage at start of collect_results: {process.memory_info().rss / 1e6:.1f} MB") + # calculate the minimum number of function evaluations that must be valid minimum_fraction_of_budget_valid = group.get("minimum_fraction_of_budget_valid", None) if minimum_fraction_of_budget_valid is not None: @@ -342,10 +349,19 @@ def cumulative_time_taken(results: list) -> list: temp_res_filtered = list(filter(lambda config: is_valid_config_result(config), temp_results)) only_invalid = len(temp_res_filtered) < 2 # there must be at least two valid configurations attempt += 1 + + # compress the results if necessary + if compress: + results = gzip.compress(pickle.dumps(results)) + # register the results repeated_results.append(results) total_time_results = np.append(total_time_results, total_time_ms) + # report the memory usage + if profiling: + warnings.warn(f"Memory usage after iteration {rep}: {process.memory_info().rss / 1e6:.1f} MB") + # gather profiling data and clear the profiler before the next round if profiling: stats = yappi.get_func_stats() @@ -353,30 +369,69 @@ def cumulative_time_taken(results: list) -> list: path = results_description.run_folder + "/profile-v2.prof" stats.save(path, type="pstat") # pylint: disable=no-member yappi.clear_stats() + warnings.warn(f"Memory usage before writing in collect_results: {process.memory_info().rss / 1e6:.1f} MB") # combine the results to numpy arrays and write to a file - write_results(repeated_results, results_description) + write_results(repeated_results, results_description, compressed=compress) + if profiling: + warnings.warn(f"Memory usage at end of of collect_results: {process.memory_info().rss / 1e6:.1f} MB") assert results_description.has_results(), "No results in ResultsDescription after writing results." return results_description -def write_results(repeated_results: list, results_description: ResultsDescription): +def write_results(repeated_results: list, results_description: ResultsDescription, compressed=False): """Combine the results and write them to a NumPy file. Args: repeated_results: a list of tuning results, one per tuning session. results_description: the ``ResultsDescription`` object to write the results to. + compressed: whether the repeated_results are compressed. """ # get the objective value and time keys objective_time_keys = results_description.objective_time_keys objective_performance_keys = results_description.objective_performance_keys - # find the maximum number of function evaluations - max_num_evals = max(len(repeat) for repeat in repeated_results) + # find the maximum (reasonable) number of function evaluations + num_evals = [] + for repeat in repeated_results: + if compressed: + repeat = pickle.loads(gzip.decompress(repeat)) + num_evals.append(len(repeat)) + max_num_evals = max(num_evals) if num_evals else 0 + mean_num_evals = np.mean(num_evals) if num_evals else 0 + if max_num_evals > mean_num_evals * 2: + # the maximum number of evaluations is more than twice the mean, this is likely an outlier, cut to save memory + max_num_evals = int(mean_num_evals * 2) + if max_num_evals > 1e8: + # more than 100 million evaluations, set to the mean number of evaluations + max_num_evals = int(mean_num_evals) + + # set the dtype + dtype = np.float64 + if max_num_evals * len(repeated_results) > 1e9: + warnings.warn( + f"More than 1 billion entries ({max_num_evals * len(repeated_results)}) in the results, using float16 to save memory." + ) + dtype = np.float16 + elif max_num_evals * len(repeated_results) > 1e8: + warnings.warn( + f"More than 100 million entries ({max_num_evals * len(repeated_results)}) in the results, using float32 to save memory." + ) + dtype = np.float32 + estimated_memory_usage = max_num_evals * len(repeated_results) * ( + 8 if dtype == np.float64 else 2 if dtype == np.float16 else 4 + ) # 8 bytes for float64, 4 bytes for float32, 2 bytes for float16 + if estimated_memory_usage > 1e9*10: # more than 10 GB + warnings.warn( + f"Estimated memory usage of {estimated_memory_usage / 1e9:.2f} GB for the results arrays, may go out of memory." + ) def get_nan_array() -> np.ndarray: """Get an array of NaN so they are not counted as zeros inadvertedly.""" - return np.full((max_num_evals, len(repeated_results)), np.nan) + # return np.full((max_num_evals, len(repeated_results)), np.nan, dtype=dtype) + arr = np.empty((max_num_evals, len(repeated_results)), dtype=dtype) + arr.fill(np.nan) + return arr # set the arrays to write to fevals_results = get_nan_array() @@ -384,17 +439,22 @@ def get_nan_array() -> np.ndarray: objective_performance_results = get_nan_array() objective_performance_best_results = get_nan_array() objective_performance_stds = get_nan_array() - objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan) + objective_time_results_per_key = np.full((len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan, dtype=dtype) objective_performance_results_per_key = np.full( - (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan + (len(objective_time_keys), max_num_evals, len(repeated_results)), np.nan, dtype=dtype ) # combine the results opt_func = np.nanmin if results_description.minimization is True else np.nanmax for repeat_index, repeat in enumerate(repeated_results): + if compressed: + repeat = pickle.loads(gzip.decompress(repeat)) cumulative_objective_time = 0 objective_performance_best = np.nan for evaluation_index, evaluation in enumerate(repeat): + if evaluation_index >= max_num_evals: + break + # set the number of function evaluations fevals_results[evaluation_index, repeat_index] = ( evaluation_index + 1 From c81fd74fd939a3cfeeefa42ed811bd85b9343118 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 19 Aug 2025 18:00:58 +0200 Subject: [PATCH 224/234] Implemented optional printing means of columns and rows of heatmaps --- .../schemas/experiments.json | 24 +++++++++++++++++++ .../visualize_experiments.py | 10 ++++++++ 2 files changed, 34 insertions(+) diff --git a/src/autotuning_methodology/schemas/experiments.json b/src/autotuning_methodology/schemas/experiments.json index 0efd222..81bb87d 100755 --- a/src/autotuning_methodology/schemas/experiments.json +++ b/src/autotuning_methodology/schemas/experiments.json @@ -461,6 +461,30 @@ "y_label": { "description": "Override the default generated label for the y-axis.", "type": "string" + }, + "print_mean_of_columns": { + "description": "Print the mean of the columns in the heatmap to the console.", + "type": "boolean", + "default": false + }, + "print_mean_of_rows": { + "description": "Print the mean of the rows in the heatmap to the console.", + "type": "boolean", + "default": false + }, + "divide_train_test_axis": { + "description": "Axis to divide the train and test data on in the heatmap.", + "type": "string", + "enum": [ + "gpus", + "applications" + ], + "default": "gpus" + }, + "divide_train_test_after_num": { + "description": "Number of items in the divide_train_test_axis after which to divide the train and test data.", + "type": "integer", + "default": 3 } } } diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index 23c1c19..b8cf393 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -429,6 +429,8 @@ def __init__( plot_x_value_types: list[str] = plot["x_axis_value_types"] plot_y_value_types: list[str] = plot["y_axis_value_types"] annotate: bool = plot.get("annotate", True) + print_mean_of_columns: bool = plot.get("print_mean_of_columns", False) + print_mean_of_rows: bool = plot.get("print_mean_of_rows", False) assert len(plot_x_value_types) == 1 assert len(plot_y_value_types) == 1 x_type = plot_x_value_types[0] @@ -757,6 +759,14 @@ def norm_color_val(v): color="white" if (number > 0.5 or number < -2) else "black", fontsize="small", ) + + # print extra information if requested + if print_mean_of_columns: + mean_of_columns = np.nanmean(plot_data, axis=0) + print(f"Mean of columns for {strategy_displayname} ({x_type}): {mean_of_columns}") + if print_mean_of_rows: + mean_of_rows = np.nanmean(plot_data, axis=1) + print(f"Mean of rows for {strategy_displayname} ({y_type}): {mean_of_rows}") # finalize the figure and save or display it fig.tight_layout() From 43067cfee8f1b90c2f53924ed859b489141deaf4 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 19 Aug 2025 18:17:31 +0200 Subject: [PATCH 225/234] Experiment files for the upcoming LLaMEA paper with Niki --- experiment_files/niki_compare.json | 201 +++++++++++++++++++++++++ experiment_files/niki_compare_kt.json | 207 ++++++++++++++++++++++++++ experiment_files/nikki_compare.json | 122 --------------- 3 files changed, 408 insertions(+), 122 deletions(-) create mode 100644 experiment_files/niki_compare.json create mode 100644 experiment_files/niki_compare_kt.json delete mode 100644 experiment_files/nikki_compare.json diff --git a/experiment_files/niki_compare.json b/experiment_files/niki_compare.json new file mode 100644 index 0000000..3a14948 --- /dev/null +++ b/experiment_files/niki_compare.json @@ -0,0 +1,201 @@ +{ + "version": "1.2.0", + "name": "Compare Niki's LLMAEA algorithms", + "parent_folder": "/var/scratch/fjwillem/compare_niki", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "llamea_alg-convolution-LLaMEA-o4-mini", + "search_method": "PRTS", + "display_name": "LLaMEA o4 mini convolution", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-convolution-LLaMEA-o4-mini.py" + }, + { + "name": "llamea_alg-convolution-no-info-LLaMEA-o4-mini", + "search_method": "TabuHarmonySearch", + "display_name": "LLaMEA o4 mini convolution no info", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-convolution-no-info-LLaMEA-o4-mini.py", + "color_parent": "llamea_alg-convolution-LLaMEA-o4-mini" + }, + { + "name": "llamea_alg-dedispersion-LLaMEA-o4-mini", + "search_method": "HybridVNDX", + "display_name": "LLaMEA o4 mini dedispersion", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-LLaMEA-o4-mini.py" + }, + { + "name": "llamea_alg-dedispersion-no-info-LLaMEA-o4-mini", + "search_method": "AdaptiveBanditNeighborhoodSearch", + "display_name": "LLaMEA o4 mini dedispersion no info", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-no-info-LLaMEA-o4-mini.py", + "color_parent": "llamea_alg-dedispersion-LLaMEA-o4-mini" + }, + { + "name": "llamea_alg-gemm-LLaMEA-o4-mini", + "search_method": "AdaptiveTabuGreyWolf", + "display_name": "LLaMEA o4 mini gemm", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py" + }, + { + "name": "llamea_alg-gemm-no-info-LLaMEA-o4-mini", + "search_method": "HierarchicalBanditVNS", + "display_name": "LLaMEA o4 mini gemm no info", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-gemm-no-info-LLaMEA-o4-mini.py", + "color_parent": "llamea_alg-gemm-LLaMEA-o4-mini" + }, + { + "name": "llamea_alg-hotspot-LLaMEA-o4-mini", + "search_method": "ThompsonVNS", + "display_name": "LLaMEA o4 mini hotspot", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-LLaMEA-o4-mini.py" + }, + { + "name": "llamea_alg-hotspot-no-info-LLaMEA-o4-mini", + "search_method": "AdaptiveLevySA", + "display_name": "LLaMEA o4 mini hotspot no info", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-no-info-LLaMEA-o4-mini.py", + "color_parent": "llamea_alg-hotspot-LLaMEA-o4-mini" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": true, + "include_colorbar": false, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": true, + "print_mean_of_columns": true, + "print_mean_of_rows": false + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": true, + "print_mean_of_columns": true, + "print_mean_of_rows": false + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/niki_compare_kt.json b/experiment_files/niki_compare_kt.json new file mode 100644 index 0000000..a1afa9c --- /dev/null +++ b/experiment_files/niki_compare_kt.json @@ -0,0 +1,207 @@ +{ + "version": "1.2.0", + "name": "Compare Niki's LLMAEA algorithms", + "parent_folder": "/var/scratch/fjwillem/compare_niki", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "llamea_alg-dedispersion-LLaMEA-o4-mini", + "search_method": "HybridVNDX", + "display_name": "LLaMEA o4 mini dedispersion", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-LLaMEA-o4-mini.py", + "color_index": 1 + }, + { + "name": "llamea_alg-gemm-LLaMEA-o4-mini", + "search_method": "AdaptiveTabuGreyWolf", + "display_name": "LLaMEA o4 mini gemm", + "autotuner": "KernelTuner", + "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py", + "color_index": 2 + }, + { + "name": "genetic_algorithm_tuned_extensive_6", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "popsize", + "value": 26 + }, + { + "name": "maxiter", + "value": 150 + }, + { + "name": "method", + "value": "single_point" + }, + { + "name": "mutation_chance", + "value": 5 + } + ], + "display_name": "Genetic Algorithm", + "autotuner": "KernelTuner", + "color_index": 4 + }, + { + "name": "simulated_annealing_tuned_extensive", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "T", + "value": 0.1 + }, + { + "name": "T_min", + "value": 0.0001 + }, + { + "name": "alpha", + "value": 0.9975 + }, + { + "name": "maxiter", + "value": 1 + } + ], + "display_name": "Simulated Annealing", + "autotuner": "KernelTuner", + "color_index": 5 + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "normalized", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": true, + "include_colorbar": false, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": true, + "print_mean_of_columns": true, + "print_mean_of_rows": false + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true, + "divide_train_test_axis": "gpus", + "divide_train_test_after_num": 3, + "annotate": true, + "print_mean_of_columns": true, + "print_mean_of_rows": false + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/nikki_compare.json b/experiment_files/nikki_compare.json deleted file mode 100644 index 4d58791..0000000 --- a/experiment_files/nikki_compare.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare Nikki's LLMAEA algorithms", - "parent_folder": "./compare_nikki", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "nikki_good_alg_gemm", - "search_method": "AdaptiveSimulatedAnnealing", - "display_name": "Adaptive Simulated Annealing", - "autotuner": "KernelTuner", - "custom_search_method_path": "../nikki_good_alg_gemm.py" - }, - { - "name": "pso", - "search_method": "pso", - "display_name": "PSO", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing", - "search_method": "simulated_annealing", - "display_name": "Simulated Annealing", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file From 5536c5a4a82fa6f3f14d36e38b8e32baf00b6347 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Tue, 19 Aug 2025 18:33:53 +0200 Subject: [PATCH 226/234] Improved the color index resolution and assertions --- src/autotuning_methodology/visualize_experiments.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/autotuning_methodology/visualize_experiments.py b/src/autotuning_methodology/visualize_experiments.py index b8cf393..fb2cd0e 100755 --- a/src/autotuning_methodology/visualize_experiments.py +++ b/src/autotuning_methodology/visualize_experiments.py @@ -79,6 +79,8 @@ def get_colors(strategies: list[dict]) -> list: if override_index: assert "color_index" in strategy, f"All strategies, including '{name}', must have either 'color_index' or 'color_parent' if 'color_index' is used anywhere." color_index = strategy["color_index"] + if color_index >= len(tab10): + raise ValueError(f"Color index {color_index} for strategy '{name}' is out of bounds for tab10 colormap (max {len(tab10) - 1})") base_color = tab10[color_index] parent_colors[name] = { idx: lighten_color(base_color, amount=0.4 + 0.3 * j) for j, idx in enumerate(children_indices) @@ -90,6 +92,9 @@ def get_colors(strategies: list[dict]) -> list: assert parent in parent_colors, f"Parent '{parent}' for strategy '{name}' not found in parent colors - child strategies must be defined after their parents." colors[i] = parent_colors[parent][i] else: + if override_index: + assert "color_index" in strategy, f"All strategies, including '{name}', must have either 'color_index' or 'color_parent' if 'color_index' is used anywhere." + color_index = strategy["color_index"] if color_index >= len(tab10): raise ValueError("Too many unparented strategies for tab10 colormap") colors[i] = to_hex(tab10[color_index]) From 5543e8a8f8a1a3ac18f6c2b004f4459917e1e014 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 09:31:36 +0200 Subject: [PATCH 227/234] Updated experiment files for upcoming paper --- experiment_files/diff_evo.json | 195 ++++++++++++++++++++++++++ experiment_files/niki_compare.json | 16 +-- experiment_files/niki_compare_kt.json | 21 ++- 3 files changed, 222 insertions(+), 10 deletions(-) create mode 100644 experiment_files/diff_evo.json diff --git a/experiment_files/diff_evo.json b/experiment_files/diff_evo.json new file mode 100644 index 0000000..d67dbf5 --- /dev/null +++ b/experiment_files/diff_evo.json @@ -0,0 +1,195 @@ +{ + "version": "1.2.0", + "name": "Compare Differential Evolution strategies Kernel Tuner", + "parent_folder": "/var/scratch/fjwillem/constrained_optimization", + "experimental_groups_defaults": { + "applications": [ + { + "name": "dedispersion_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "dedispersion_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "hotspot_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "hotspot_milo.json", + "objective_performance_keys": [ + "GFLOP/s" + ] + }, + { + "name": "convolution_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "convolution_milo.json", + "objective_performance_keys": [ + "time" + ] + }, + { + "name": "gemm_milo", + "folder": "../autotuning_methodology/benchmark_hub/kernels", + "input_file": "gemm_milo.json", + "objective_performance_keys": [ + "time" + ] + } + ], + "gpus": [ + "A100", + "A4000", + "MI250X", + "A6000", + "W6600", + "W7800" + ], + "pattern_for_full_search_space_filenames": { + "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" + }, + "stochastic": true, + "repeats": 100, + "samples": 32, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 10, + "ignore_cache": false + }, + "search_strategies": [ + { + "name": "firefly_constrained", + "search_method": "firefly_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "Firefly constrained", + "autotuner": "KernelTuner" + }, + { + "name": "genetic_algorithm_constrained", + "search_method": "genetic_algorithm", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "GA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "pso_constrained", + "search_method": "pso", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "PSO constrained", + "autotuner": "KernelTuner" + }, + { + "name": "simulated_annealing_constrained", + "search_method": "simulated_annealing", + "search_method_hyperparameters": [ + { + "name": "constraint_aware", + "value": true + } + ], + "display_name": "SA constrained", + "autotuner": "KernelTuner" + }, + { + "name": "diff_evo", + "search_method": "diff_evo", + "display_name": "Diff Evolution", + "autotuner": "KernelTuner" + } + ], + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + }, + "visualization_settings": { + "plots": [ + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "fevals" + ], + "y_axis_value_types": [ + "absolute", + "baseline" + ] + }, + { + "scope": "searchspace", + "style": "line", + "x_axis_value_types": [ + "time" + ], + "y_axis_value_types": [ + "absolute", + "baseline" + ] + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "include_y_labels": true, + "include_colorbar": false, + "annotate": true + }, + { + "scope": "search_strategy", + "style": "heatmap", + "x_axis_value_types": [ + "applications" + ], + "y_axis_value_types": [ + "gpus" + ], + "cmin": -8.0, + "cnum": 10, + "include_y_labels": false, + "include_colorbar": true, + "annotate": true + }, + { + "scope": "aggregate", + "style": "head2head", + "comparison": { + "unit": "time", + "relative_time": 0.5 + }, + "annotate": true + }, + { + "scope": "aggregate", + "style": "line", + "ylabel": "Aggregate performance relative to baseline" + } + ], + "resolution": 1000.0, + "confidence_level": 0.95, + "compare_baselines": false, + "compare_split_times": false + } +} \ No newline at end of file diff --git a/experiment_files/niki_compare.json b/experiment_files/niki_compare.json index 3a14948..3e4d7a2 100644 --- a/experiment_files/niki_compare.json +++ b/experiment_files/niki_compare.json @@ -59,14 +59,14 @@ { "name": "llamea_alg-convolution-LLaMEA-o4-mini", "search_method": "PRTS", - "display_name": "LLaMEA o4 mini convolution", + "display_name": "LLaMEA o4 mini convolution extra info", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-convolution-LLaMEA-o4-mini.py" }, { "name": "llamea_alg-convolution-no-info-LLaMEA-o4-mini", "search_method": "TabuHarmonySearch", - "display_name": "LLaMEA o4 mini convolution no info", + "display_name": "LLaMEA o4 mini convolution", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-convolution-no-info-LLaMEA-o4-mini.py", "color_parent": "llamea_alg-convolution-LLaMEA-o4-mini" @@ -74,14 +74,14 @@ { "name": "llamea_alg-dedispersion-LLaMEA-o4-mini", "search_method": "HybridVNDX", - "display_name": "LLaMEA o4 mini dedispersion", + "display_name": "LLaMEA o4 mini dedispersion extra info", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-LLaMEA-o4-mini.py" }, { "name": "llamea_alg-dedispersion-no-info-LLaMEA-o4-mini", "search_method": "AdaptiveBanditNeighborhoodSearch", - "display_name": "LLaMEA o4 mini dedispersion no info", + "display_name": "LLaMEA o4 mini dedispersion", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-no-info-LLaMEA-o4-mini.py", "color_parent": "llamea_alg-dedispersion-LLaMEA-o4-mini" @@ -89,14 +89,14 @@ { "name": "llamea_alg-gemm-LLaMEA-o4-mini", "search_method": "AdaptiveTabuGreyWolf", - "display_name": "LLaMEA o4 mini gemm", + "display_name": "LLaMEA o4 mini gemm extra info", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py" }, { "name": "llamea_alg-gemm-no-info-LLaMEA-o4-mini", "search_method": "HierarchicalBanditVNS", - "display_name": "LLaMEA o4 mini gemm no info", + "display_name": "LLaMEA o4 mini gemm", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-gemm-no-info-LLaMEA-o4-mini.py", "color_parent": "llamea_alg-gemm-LLaMEA-o4-mini" @@ -104,14 +104,14 @@ { "name": "llamea_alg-hotspot-LLaMEA-o4-mini", "search_method": "ThompsonVNS", - "display_name": "LLaMEA o4 mini hotspot", + "display_name": "LLaMEA o4 mini hotspot extra info", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-LLaMEA-o4-mini.py" }, { "name": "llamea_alg-hotspot-no-info-LLaMEA-o4-mini", "search_method": "AdaptiveLevySA", - "display_name": "LLaMEA o4 mini hotspot no info", + "display_name": "LLaMEA o4 mini hotspot", "autotuner": "KernelTuner", "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-no-info-LLaMEA-o4-mini.py", "color_parent": "llamea_alg-hotspot-LLaMEA-o4-mini" diff --git a/experiment_files/niki_compare_kt.json b/experiment_files/niki_compare_kt.json index a1afa9c..0246a1e 100644 --- a/experiment_files/niki_compare_kt.json +++ b/experiment_files/niki_compare_kt.json @@ -72,6 +72,23 @@ "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py", "color_index": 2 }, + { + "name": "pyatf_differential_evolution", + "search_method": "pyatf_strategies", + "search_method_hyperparameters": [ + { + "name": "searchtechnique", + "value": "differential_evolution" + }, + { + "name": "use_searchspace_cache", + "value": true + } + ], + "display_name": "pyATF Differential Evolution", + "autotuner": "KernelTuner", + "color_index": 7 + }, { "name": "genetic_algorithm_tuned_extensive_6", "search_method": "genetic_algorithm", @@ -93,7 +110,7 @@ "value": 5 } ], - "display_name": "Genetic Algorithm", + "display_name": "Kernel Tuner Genetic Algorithm", "autotuner": "KernelTuner", "color_index": 4 }, @@ -118,7 +135,7 @@ "value": 1 } ], - "display_name": "Simulated Annealing", + "display_name": "Kernel Tuner Simulated Annealing", "autotuner": "KernelTuner", "color_index": 5 } From 60e9e1208e6e7b5e4724056e43721e6db5ffefb3 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 09:43:33 +0200 Subject: [PATCH 228/234] Updated dependency version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 338351d..bec9576 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "Software package easing implementation of the guidelines of the 2 keywords = ["autotuning", "auto-tuning", "methodology", "scientific"] readme = "README.md" license = { file = "LICENSE" } -requires-python = ">=3.9" # NOTE when updating python version, also update classifiers and Nox test versions +requires-python = ">=3.10,<4" # NOTE when updating python version, also update classifiers and Nox test versions classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", @@ -31,7 +31,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.2.0", + "kernel_tuner >= 1.2", ] [project.optional-dependencies] From b86d569c255338f176d73d2ad5cfba1795d542cf Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 09:55:44 +0200 Subject: [PATCH 229/234] Removed publication-specific experiment files to other repository --- .../compare_constrained_strategies_kt.json | 241 ------------ ...re_constrained_strategies_pyatf_cache.json | 279 -------------- ...mpare_constrained_strategies_pyatf_kt.json | 264 ------------- experiment_files/compare_hypertuners.json | 134 ------- .../compare_hypertuners_metastrategy.json | 94 ----- .../compare_hypertuners_milo.json | 198 ---------- .../compare_hypertuners_paper.json | 298 --------------- .../compare_hypertuners_paper_extensive.json | 361 ------------------ ...mpare_hypertuners_paper_heatmaps_left.json | 174 --------- ...pare_hypertuners_paper_heatmaps_right.json | 175 --------- experiment_files/compare_meta_algorithms.json | 99 ----- experiment_files/niki_compare.json | 201 ---------- experiment_files/niki_compare_kt.json | 224 ----------- 13 files changed, 2742 deletions(-) delete mode 100644 experiment_files/compare_constrained_strategies_kt.json delete mode 100644 experiment_files/compare_constrained_strategies_pyatf_cache.json delete mode 100644 experiment_files/compare_constrained_strategies_pyatf_kt.json delete mode 100644 experiment_files/compare_hypertuners.json delete mode 100644 experiment_files/compare_hypertuners_metastrategy.json delete mode 100644 experiment_files/compare_hypertuners_milo.json delete mode 100644 experiment_files/compare_hypertuners_paper.json delete mode 100644 experiment_files/compare_hypertuners_paper_extensive.json delete mode 100644 experiment_files/compare_hypertuners_paper_heatmaps_left.json delete mode 100644 experiment_files/compare_hypertuners_paper_heatmaps_right.json delete mode 100644 experiment_files/compare_meta_algorithms.json delete mode 100644 experiment_files/niki_compare.json delete mode 100644 experiment_files/niki_compare_kt.json diff --git a/experiment_files/compare_constrained_strategies_kt.json b/experiment_files/compare_constrained_strategies_kt.json deleted file mode 100644 index c2658f5..0000000 --- a/experiment_files/compare_constrained_strategies_kt.json +++ /dev/null @@ -1,241 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare constrained strategies Kernel Tuner", - "parent_folder": "/var/scratch/fjwillem/constrained_optimization", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "firefly_constrained", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "Firefly constrained", - "autotuner": "KernelTuner" - }, - { - "name": "firefly_constrained_non_constrained", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "Firefly non-constrained", - "autotuner": "KernelTuner", - "color_parent": "firefly_constrained" - }, - { - "name": "genetic_algorithm_constrained", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "GA constrained", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_non_constrained", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "GA non-constrained", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_constrained" - }, - { - "name": "pso_constrained", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "PSO constrained", - "autotuner": "KernelTuner" - }, - { - "name": "pso_non_constrained", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "PSO non-constrained", - "autotuner": "KernelTuner", - "color_parent": "pso_constrained" - }, - { - "name": "simulated_annealing_constrained", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "SA constrained", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_non_constrained_2", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": false - } - ], - "display_name": "SA non-constrained", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_constrained" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "absolute", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "absolute", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "include_y_labels": true, - "include_colorbar": false, - "annotate": true - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": false, - "include_colorbar": true, - "annotate": true - }, - { - "scope": "aggregate", - "style": "head2head", - "comparison": { - "unit": "time", - "relative_time": 0.5 - }, - "annotate": true - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_constrained_strategies_pyatf_cache.json b/experiment_files/compare_constrained_strategies_pyatf_cache.json deleted file mode 100644 index 09f2e27..0000000 --- a/experiment_files/compare_constrained_strategies_pyatf_cache.json +++ /dev/null @@ -1,279 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare constrained strategies pyATF cached vs non-cached", - "parent_folder": "/var/scratch/fjwillem/constrained_optimization", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "pyatf_simulated_annealing", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "simulated_annealing" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Simulated Annealing", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_simulated_annealing_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "simulated_annealing" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF Simulated Annealing (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_simulated_annealing" - }, - { - "name": "pyatf_auc_bandit", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "auc_bandit" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF AUC Bandit", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_auc_bandit_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "auc_bandit" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF AUC Bandit (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_auc_bandit" - }, - { - "name": "pyatf_differential_evolution", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "differential_evolution" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Differential Evolution", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_differential_evolution_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "differential_evolution" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF Differential Evolution (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_differential_evolution" - }, - { - "name": "pyatf_pattern_search", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "pattern_search" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Pattern Search", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_pattern_search_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "pattern_search" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF Pattern Search (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_pattern_search" - }, - { - "name": "pyatf_round_robin", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "round_robin" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Round Robin", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_round_robin_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "round_robin" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF Round Robin (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_round_robin" - }, - { - "name": "pyatf_torczon", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "torczon" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Torczon", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_torczon_no_cache", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "torczon" - }, - { - "name": "use_searchspace_cache", - "value": false - } - ], - "display_name": "pyATF Torczon (no cache)", - "autotuner": "KernelTuner", - "color_parent": "pyatf_torczon" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_constrained_strategies_pyatf_kt.json b/experiment_files/compare_constrained_strategies_pyatf_kt.json deleted file mode 100644 index b032289..0000000 --- a/experiment_files/compare_constrained_strategies_pyatf_kt.json +++ /dev/null @@ -1,264 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare constrained strategies pyATF vs KT", - "parent_folder": "/var/scratch/fjwillem/constrained_optimization", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "firefly_constrained", - "search_method": "firefly_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "KT Firefly", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_constrained", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "KT GA", - "autotuner": "KernelTuner" - }, - { - "name": "pso_constrained", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "KT PSO", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_constrained", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "constraint_aware", - "value": true - } - ], - "display_name": "KT SA", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_simulated_annealing", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "simulated_annealing" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF SA", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_auc_bandit", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "auc_bandit" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF AUC Bandit", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_differential_evolution", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "differential_evolution" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF DE", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_pattern_search", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "pattern_search" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Pattern Search", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_round_robin", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "round_robin" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Round Robin", - "autotuner": "KernelTuner" - }, - { - "name": "pyatf_torczon", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "torczon" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Torczon", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "include_y_labels": true, - "include_colorbar": false, - "annotate": true - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": false, - "include_colorbar": true, - "annotate": true - }, - { - "scope": "aggregate", - "style": "head2head", - "comparison": { - "unit": "time", - "relative_time": 0.5 - }, - "annotate": true - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline", - "vmin": -4.0 - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners.json b/experiment_files/compare_hypertuners.json deleted file mode 100644 index e1108af..0000000 --- a/experiment_files/compare_hypertuners.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "version": "1.1.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning", - "experimental_groups_defaults": { - "applications": [ - { - "name": "convolution", - "folder": "./cached_data_used/kernels", - "input_file": "convolution.json" - }, - { - "name": "pnpoly", - "folder": "./cached_data_used/kernels", - "input_file": "pnpoly.json" - } - ], - "gpus": [ - "RTX_3090", - "RTX_2080_Ti" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 50, - "samples": 32, - "minimum_number_of_valid_search_iterations": 20, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "pso_default", - "search_method": "pso", - "display_name": "PSO default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "w", - "value": 0.5 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.0 - } - ] - }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "w", - "value": 0.25 - }, - { - "name": "c1", - "value": 3.0 - }, - { - "name": "c2", - "value": 1.5 - } - ], - "display_name": "PSO tuned", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "minimization": true, - "cutoff_percentile": 0.96, - "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", - "objective_time_keys": [ - "all" - ], - "objective_performance_keys": [ - "time" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "searchspaces" - ], - "bins": 100 - }, - { - "scope": "aggregate", - "style": "line" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_metastrategy.json b/experiment_files/compare_hypertuners_metastrategy.json deleted file mode 100644 index 62e3923..0000000 --- a/experiment_files/compare_hypertuners_metastrategy.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter metastrategies", - "parent_folder": "./hyperparametertuning_metastrategies", - "experimental_groups_defaults": { - "applications": [ - { - "name": "genetic_algorithm", - "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", - "input_file": "metatuning_genetic_algorithm.json", - "objective_performance_keys": [ - "score" - ] - }, - { - "name": "pso", - "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", - "input_file": "metatuning_pso.json", - "objective_performance_keys": [ - "score" - ] - }, - { - "name": "simulated_annealing", - "folder": "../autotuning_methodology/benchmark_hub/hyperparametertuning/algorithms", - "input_file": "metatuning_simulated_annealing.json", - "objective_performance_keys": [ - "score" - ] - } - ], - "gpus": [ - "A4000" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/hyperparametertuning/cachefiles/hyperparamtuning_paper_bruteforce_${applications}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 1, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 2, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "meta_dual_annealing", - "search_method": "dual_annealing", - "display_name": "Dual Annealing", - "autotuner": "KernelTuner" - }, - { - "name": "meta_genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", - "autotuner": "KernelTuner" - }, - { - "name": "meta_pso", - "search_method": "pso", - "display_name": "PSO", - "autotuner": "KernelTuner" - }, - { - "name": "meta_simulated_annealing", - "search_method": "simulated_annealing", - "display_name": "Simulated Annealing", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.0, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "aggregate", - "style": "line", - "xlabel": "Relative time until the budget is exhausted", - "ylabel": "Aggregate performance relative to baseline", - "tmin": "real" - } - ], - "resolution": 1000000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_milo.json b/experiment_files/compare_hypertuners_milo.json deleted file mode 100644 index 2025ee3..0000000 --- a/experiment_files/compare_hypertuners_milo.json +++ /dev/null @@ -1,198 +0,0 @@ -{ - "version": "1.1.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning_milo", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", - "input_file": "dedispersion_milo.json" - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/cached_data_used/kernels", - "input_file": "convolution_milo.json" - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 50, - "samples": 32, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "pso_default", - "search_method": "pso", - "display_name": "PSO default", - "autotuner": "KernelTuner", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "w", - "value": 0.5 - }, - { - "name": "c1", - "value": 2.0 - }, - { - "name": "c2", - "value": 1.0 - } - ] - }, - { - "name": "greedy_ils_default", - "search_method": "greedy_ils", - "search_method_hyperparameters": [ - { - "name": "neighbor", - "value": "Hamming" - }, - { - "name": "restart", - "value": true - }, - { - "name": "no_improvement", - "value": 50 - }, - { - "name": "random_walk", - "value": 0.3 - } - ], - "display_name": "Greedy ILS default", - "autotuner": "KernelTuner" - }, - { - "name": "bayes_opt", - "search_method": "bayes_opt", - "display_name": "Bayesian Optimization SciKit", - "autotuner": "KernelTuner" - }, - { - "name": "bayes_opt_botorch_3", - "search_method": "bayes_opt_BOTorch", - "display_name": "Bayesian Optimization BOTorch", - "autotuner": "KernelTuner", - "color_parent": "bayes_opt" - }, - { - "name": "bayes_opt_botorch_transfer_weighted", - "search_method": "bayes_opt_BOTorch_transfer_weighted", - "display_name": "Bayesian Optimization BOTorch TL", - "autotuner": "KernelTuner", - "color_parent": "bayes_opt" - }, - { - "name": "genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing", - "search_method": "dual_annealing", - "display_name": "Dual Annealing default", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "CG" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing" - } - ], - "statistics_settings": { - "minimization": true, - "cutoff_percentile": 0.96, - "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", - "objective_time_keys": [ - "all" - ], - "objective_performance_keys": [ - "time" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "searchspaces" - ], - "bins": 100 - }, - { - "scope": "aggregate", - "style": "line" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_paper.json b/experiment_files/compare_hypertuners_paper.json deleted file mode 100644 index 53d9e61..0000000 --- a/experiment_files/compare_hypertuners_paper.json +++ /dev/null @@ -1,298 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning_milo", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "COBYLA" - } - ], - "display_name": "Dual Annealing optimal", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing_mean", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "trust-constr" - } - ], - "display_name": "Dual Annealing mean", - "autotuner": "KernelTuner", - "color_parent": "dual_annealing_tuned" - }, - { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 5 - } - ], - "display_name": "Genetic Algorithm optimal", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_mean", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "disruptive_uniform" - }, - { - "name": "mutation_chance", - "value": 20 - } - ], - "display_name": "Genetic Algorithm mean", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_tuned" - }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "c1", - "value": 3.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "display_name": "PSO optimal", - "autotuner": "KernelTuner" - }, - { - "name": "pso_mean", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "c1", - "value": 1.0 - }, - { - "name": "c2", - "value": 1.0 - } - ], - "display_name": "PSO mean", - "autotuner": "KernelTuner", - "color_parent": "pso_tuned" - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.5 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing optimal", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_mean", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.0 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.995 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing mean", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_tuned" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "searchspaces" - ], - "bins": 100 - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3 - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_paper_extensive.json b/experiment_files/compare_hypertuners_paper_extensive.json deleted file mode 100644 index 1fd39ee..0000000 --- a/experiment_files/compare_hypertuners_paper_extensive.json +++ /dev/null @@ -1,361 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning extensive", - "parent_folder": "/var/scratch/fjwillem/hyperparametertuning_milo", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "genetic_algorithm_tuned_extensive_6", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 26 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 5 - } - ], - "display_name": "Genetic Algorithm extensive", - "autotuner": "KernelTuner", - "color_index": 1 - }, - { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 5 - } - ], - "display_name": "Genetic Algorithm limited", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_tuned_extensive_6" - }, - { - "name": "genetic_algorithm_mean", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "disruptive_uniform" - }, - { - "name": "mutation_chance", - "value": 20 - } - ], - "display_name": "Genetic Algorithm mean", - "autotuner": "KernelTuner", - "color_parent": "genetic_algorithm_tuned_extensive_6" - }, - { - "name": "pso_tuned_extensive", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 50 - }, - { - "name": "maxiter", - "value": 190 - }, - { - "name": "c1", - "value": 3.5 - }, - { - "name": "c2", - "value": 1.0 - } - ], - "display_name": "PSO extensive", - "autotuner": "KernelTuner", - "color_index": 2 - }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "c1", - "value": 3.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "display_name": "PSO limited", - "autotuner": "KernelTuner", - "color_parent": "pso_tuned_extensive" - }, - { - "name": "pso_mean", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "c1", - "value": 1.0 - }, - { - "name": "c2", - "value": 1.0 - } - ], - "display_name": "PSO mean", - "autotuner": "KernelTuner", - "color_parent": "pso_tuned_extensive" - }, - { - "name": "simulated_annealing_tuned_extensive", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.1 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing extensive", - "autotuner": "KernelTuner", - "color_index": 3 - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.5 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing limited", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_tuned_extensive" - }, - { - "name": "simulated_annealing_mean", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.0 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.995 - }, - { - "name": "maxiter", - "value": 2 - } - ], - "display_name": "Simulated Annealing mean", - "autotuner": "KernelTuner", - "color_parent": "simulated_annealing_tuned_extensive" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -6.0, - "include_y_labels": true, - "include_colorbar": false, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": false - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -6.0, - "cnum": 8, - "include_y_labels": false, - "include_colorbar": true, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": false - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_left.json b/experiment_files/compare_hypertuners_paper_heatmaps_left.json deleted file mode 100644 index 900d63c..0000000 --- a/experiment_files/compare_hypertuners_paper_heatmaps_left.json +++ /dev/null @@ -1,174 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning_milo", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "dual_annealing_inv_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "Nelder-Mead" - } - ], - "display_name": "Dual Annealing untuned", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_inv_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 10 - } - ], - "display_name": "Genetic Algorithm untuned", - "autotuner": "KernelTuner" - }, - { - "name": "pso_inv_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 10 - }, - { - "name": "maxiter", - "value": 50 - }, - { - "name": "c1", - "value": 1.0 - }, - { - "name": "c2", - "value": 1.5 - } - ], - "display_name": "PSO untuned", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_inv_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 1.0 - }, - { - "name": "T_min", - "value": 0.01 - }, - { - "name": "alpha", - "value": 0.9925 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing untuned", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -6.0, - "include_y_labels": true, - "include_colorbar": false, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3 - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_hypertuners_paper_heatmaps_right.json b/experiment_files/compare_hypertuners_paper_heatmaps_right.json deleted file mode 100644 index 59d94b9..0000000 --- a/experiment_files/compare_hypertuners_paper_heatmaps_right.json +++ /dev/null @@ -1,175 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning", - "parent_folder": "./hyperparametertuning_milo", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "dual_annealing_tuned", - "search_method": "dual_annealing", - "search_method_hyperparameters": [ - { - "name": "method", - "value": "COBYLA" - } - ], - "display_name": "Dual Annealing tuned", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm_tuned", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 20 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 5 - } - ], - "display_name": "Genetic Algorithm tuned", - "autotuner": "KernelTuner" - }, - { - "name": "pso_tuned", - "search_method": "pso", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 30 - }, - { - "name": "maxiter", - "value": 100 - }, - { - "name": "c1", - "value": 3.0 - }, - { - "name": "c2", - "value": 0.5 - } - ], - "display_name": "PSO tuned", - "autotuner": "KernelTuner" - }, - { - "name": "simulated_annealing_tuned", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.5 - }, - { - "name": "T_min", - "value": 0.001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Simulated Annealing tuned", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -6.0, - "cnum": 8, - "include_y_labels": false, - "include_colorbar": true, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3 - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/compare_meta_algorithms.json b/experiment_files/compare_meta_algorithms.json deleted file mode 100644 index b24c23f..0000000 --- a/experiment_files/compare_meta_algorithms.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare hyperparameter tuning meta algorithms", - "parent_folder": "./hyperparametertuning_meta", - "experimental_groups_defaults": { - "applications": [ - { - "name": "hyperparamtuning_pso", - "folder": "../autotuning_methodology/cached_data_used/kernels", - "input_file": "hyperparamtuning_pso.json" - } - ], - "gpus": [ - "arm" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./cached_data_used/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 50, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.5, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "pso", - "search_method": "pso", - "display_name": "PSO", - "autotuner": "KernelTuner" - }, - { - "name": "greedy_ils", - "search_method": "greedy_ils", - "display_name": "Greedy ILS", - "autotuner": "KernelTuner" - }, - { - "name": "genetic_algorithm", - "search_method": "genetic_algorithm", - "display_name": "Genetic Algorithm", - "autotuner": "KernelTuner" - }, - { - "name": "dual_annealing", - "search_method": "dual_annealing", - "display_name": "Dual Annealing", - "autotuner": "KernelTuner" - } - ], - "statistics_settings": { - "minimization": false, - "cutoff_percentile": 0.96, - "cutoff_percentile_start": 0.5, - "cutoff_type": "fevals", - "objective_time_keys": [ - "compilation", - "framework", - "search_algorithm", - "validation" - ], - "objective_performance_keys": [ - "score" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "searchspaces" - ], - "y_axis_value_types": [ - "time" - ], - "bins": 100 - }, - { - "scope": "aggregate", - "style": "line" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/niki_compare.json b/experiment_files/niki_compare.json deleted file mode 100644 index 3e4d7a2..0000000 --- a/experiment_files/niki_compare.json +++ /dev/null @@ -1,201 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare Niki's LLMAEA algorithms", - "parent_folder": "/var/scratch/fjwillem/compare_niki", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "llamea_alg-convolution-LLaMEA-o4-mini", - "search_method": "PRTS", - "display_name": "LLaMEA o4 mini convolution extra info", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-convolution-LLaMEA-o4-mini.py" - }, - { - "name": "llamea_alg-convolution-no-info-LLaMEA-o4-mini", - "search_method": "TabuHarmonySearch", - "display_name": "LLaMEA o4 mini convolution", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-convolution-no-info-LLaMEA-o4-mini.py", - "color_parent": "llamea_alg-convolution-LLaMEA-o4-mini" - }, - { - "name": "llamea_alg-dedispersion-LLaMEA-o4-mini", - "search_method": "HybridVNDX", - "display_name": "LLaMEA o4 mini dedispersion extra info", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-LLaMEA-o4-mini.py" - }, - { - "name": "llamea_alg-dedispersion-no-info-LLaMEA-o4-mini", - "search_method": "AdaptiveBanditNeighborhoodSearch", - "display_name": "LLaMEA o4 mini dedispersion", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-no-info-LLaMEA-o4-mini.py", - "color_parent": "llamea_alg-dedispersion-LLaMEA-o4-mini" - }, - { - "name": "llamea_alg-gemm-LLaMEA-o4-mini", - "search_method": "AdaptiveTabuGreyWolf", - "display_name": "LLaMEA o4 mini gemm extra info", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py" - }, - { - "name": "llamea_alg-gemm-no-info-LLaMEA-o4-mini", - "search_method": "HierarchicalBanditVNS", - "display_name": "LLaMEA o4 mini gemm", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-gemm-no-info-LLaMEA-o4-mini.py", - "color_parent": "llamea_alg-gemm-LLaMEA-o4-mini" - }, - { - "name": "llamea_alg-hotspot-LLaMEA-o4-mini", - "search_method": "ThompsonVNS", - "display_name": "LLaMEA o4 mini hotspot extra info", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-LLaMEA-o4-mini.py" - }, - { - "name": "llamea_alg-hotspot-no-info-LLaMEA-o4-mini", - "search_method": "AdaptiveLevySA", - "display_name": "LLaMEA o4 mini hotspot", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-hotspot-no-info-LLaMEA-o4-mini.py", - "color_parent": "llamea_alg-hotspot-LLaMEA-o4-mini" - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": true, - "include_colorbar": false, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": true, - "print_mean_of_columns": true, - "print_mean_of_rows": false - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": false, - "include_colorbar": true, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": true, - "print_mean_of_columns": true, - "print_mean_of_rows": false - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file diff --git a/experiment_files/niki_compare_kt.json b/experiment_files/niki_compare_kt.json deleted file mode 100644 index 0246a1e..0000000 --- a/experiment_files/niki_compare_kt.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "version": "1.2.0", - "name": "Compare Niki's LLMAEA algorithms", - "parent_folder": "/var/scratch/fjwillem/compare_niki", - "experimental_groups_defaults": { - "applications": [ - { - "name": "dedispersion_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "dedispersion_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "hotspot_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "hotspot_milo.json", - "objective_performance_keys": [ - "GFLOP/s" - ] - }, - { - "name": "convolution_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "convolution_milo.json", - "objective_performance_keys": [ - "time" - ] - }, - { - "name": "gemm_milo", - "folder": "../autotuning_methodology/benchmark_hub/kernels", - "input_file": "gemm_milo.json", - "objective_performance_keys": [ - "time" - ] - } - ], - "gpus": [ - "A100", - "A4000", - "MI250X", - "A6000", - "W6600", - "W7800" - ], - "pattern_for_full_search_space_filenames": { - "regex": "./benchmark_hub/cachefiles/${applications}/${gpus}_T4.json" - }, - "stochastic": true, - "repeats": 100, - "samples": 32, - "minimum_fraction_of_budget_valid": 0.1, - "minimum_number_of_valid_search_iterations": 10, - "ignore_cache": false - }, - "search_strategies": [ - { - "name": "llamea_alg-dedispersion-LLaMEA-o4-mini", - "search_method": "HybridVNDX", - "display_name": "LLaMEA o4 mini dedispersion", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-dedispersion-LLaMEA-o4-mini.py", - "color_index": 1 - }, - { - "name": "llamea_alg-gemm-LLaMEA-o4-mini", - "search_method": "AdaptiveTabuGreyWolf", - "display_name": "LLaMEA o4 mini gemm", - "autotuner": "KernelTuner", - "custom_search_method_path": "../llamea_gen_algs/alg-gemm-LLaMEA-o4-mini.py", - "color_index": 2 - }, - { - "name": "pyatf_differential_evolution", - "search_method": "pyatf_strategies", - "search_method_hyperparameters": [ - { - "name": "searchtechnique", - "value": "differential_evolution" - }, - { - "name": "use_searchspace_cache", - "value": true - } - ], - "display_name": "pyATF Differential Evolution", - "autotuner": "KernelTuner", - "color_index": 7 - }, - { - "name": "genetic_algorithm_tuned_extensive_6", - "search_method": "genetic_algorithm", - "search_method_hyperparameters": [ - { - "name": "popsize", - "value": 26 - }, - { - "name": "maxiter", - "value": 150 - }, - { - "name": "method", - "value": "single_point" - }, - { - "name": "mutation_chance", - "value": 5 - } - ], - "display_name": "Kernel Tuner Genetic Algorithm", - "autotuner": "KernelTuner", - "color_index": 4 - }, - { - "name": "simulated_annealing_tuned_extensive", - "search_method": "simulated_annealing", - "search_method_hyperparameters": [ - { - "name": "T", - "value": 0.1 - }, - { - "name": "T_min", - "value": 0.0001 - }, - { - "name": "alpha", - "value": 0.9975 - }, - { - "name": "maxiter", - "value": 1 - } - ], - "display_name": "Kernel Tuner Simulated Annealing", - "autotuner": "KernelTuner", - "color_index": 5 - } - ], - "statistics_settings": { - "cutoff_percentile": 0.95, - "cutoff_percentile_start": 0.01, - "cutoff_type": "time", - "objective_time_keys": [ - "all" - ] - }, - "visualization_settings": { - "plots": [ - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "fevals" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "searchspace", - "style": "line", - "x_axis_value_types": [ - "time" - ], - "y_axis_value_types": [ - "normalized", - "baseline" - ] - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": true, - "include_colorbar": false, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": true, - "print_mean_of_columns": true, - "print_mean_of_rows": false - }, - { - "scope": "search_strategy", - "style": "heatmap", - "x_axis_value_types": [ - "applications" - ], - "y_axis_value_types": [ - "gpus" - ], - "cmin": -8.0, - "cnum": 10, - "include_y_labels": false, - "include_colorbar": true, - "divide_train_test_axis": "gpus", - "divide_train_test_after_num": 3, - "annotate": true, - "print_mean_of_columns": true, - "print_mean_of_rows": false - }, - { - "scope": "aggregate", - "style": "line", - "ylabel": "Aggregate performance relative to baseline" - } - ], - "resolution": 1000.0, - "confidence_level": 0.95, - "compare_baselines": false, - "compare_split_times": false - } -} \ No newline at end of file From 30e6950f720e368af0b9c183c8939965d4a27898 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 11:11:07 +0200 Subject: [PATCH 230/234] Add note to readme that installing in a virtual environment or with pipx is recommended --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dad96e6..64c8fe4 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Currently, the stable releases of this software package are compatible with [Ker ## Installation The package can be installed with `pip install autotuning_methodology`. Alternatively, it can be installed by cloning this repository and running `pip install .` in the root of the cloned project. -Python >= 3.9 is supported. +Like most Python packages, installing in a virtual environment or with `pipx` is recommended. Python >= 3.10 is supported. ## Notable features - Official software by the authors of the methodology-defining paper. From f4edf3af74d2b3a7e0a1d940f685b0b516fd3fbc Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 11:18:58 +0200 Subject: [PATCH 231/234] Dropped python3.9 support, ensured python 3.12 and 3.13 are tested throughout --- .github/workflows/build-test-python-package.yml | 2 +- .github/workflows/publish-package.yml | 4 ++-- docs/getting_started.rst | 2 +- mypy.ini | 2 +- noxfile.py | 2 +- pyproject.toml | 3 +-- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml index 51a77ac..aae2424 100644 --- a/.github/workflows/build-test-python-package.yml +++ b/.github/workflows/build-test-python-package.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/publish-package.yml b/.github/workflows/publish-package.yml index c513a5c..f95b353 100644 --- a/.github/workflows/publish-package.yml +++ b/.github/workflows/publish-package.yml @@ -23,10 +23,10 @@ jobs: steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v3 - - name: Set up Python 3.9 + - name: Set up Python 3.12 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: 3.12 cache: pip # Publishes to PyPi diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 6949605..e55b60c 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -3,7 +3,7 @@ Getting Started Start out by installing the package. The simplest way to do this is ``pip install autotuning_methodology``. -Python 3.9 and up are supported. +Python 3.10 and up are supported. Defining an experiment ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/mypy.ini b/mypy.ini index ad6ba41..b9b5049 100755 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,6 @@ # Global options: [mypy] -python_version=3.9 +python_version=3.12 [mypy-isotonic.isotonic.*] ignore_missing_imports = True diff --git a/noxfile.py b/noxfile.py index dc6821a..e3a19ad 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,7 +21,7 @@ def lint(session: nox.Session) -> None: # @nox.session # uncomment this line to only run on the current python interpreter @nox.session( - python=["3.9", "3.10", "3.11", "3.12", "3.13"] + python=["3.10", "3.11", "3.12", "3.13"] ) # missing versions can be installed with `pyenv install ...` # do not forget check / set the versions with `pyenv global`, or `pyenv local` in case of virtual environment def tests(session: nox.Session) -> None: diff --git a/pyproject.toml b/pyproject.toml index bec9576..9503feb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = ["flit_core >=3.8.0,<4"] [project] # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata name = "autotuning_methodology" -version = "1.2.0" +version = "1.1.0" authors = [{ name = "Floris-Jan Willemsen", email = "fjwillemsen97@gmail.com" }] description = "Software package easing implementation of the guidelines of the 2024 paper 'A Methodology for Comparing Auto-Tuning Optimization Algorithms' (https://doi.org/10.1016/j.future.2024.05.021). The DOI of this software is https://doi.org/10.5281/zenodo.11243974." keywords = ["autotuning", "auto-tuning", "methodology", "scientific"] @@ -16,7 +16,6 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", From 5e7b92c390afdcf9452b372404cf5a4b28aa9e12 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Wed, 3 Sep 2025 11:23:02 +0200 Subject: [PATCH 232/234] Various improvements to linting --- src/autotuning_methodology/runner.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/autotuning_methodology/runner.py b/src/autotuning_methodology/runner.py index 7cecd2e..6035a97 100755 --- a/src/autotuning_methodology/runner.py +++ b/src/autotuning_methodology/runner.py @@ -8,7 +8,10 @@ import time as python_time import warnings from pathlib import Path -import pickle, gzip # compression libraries if necessary for collecting results + +# compression libraries if necessary for collecting results +import pickle +import gzip import numpy as np import progressbar @@ -227,13 +230,15 @@ def collect_results( results_description: the ``ResultsDescription`` object to write the results to. searchspace_stats: the ``SearchspaceStatistics`` object, used for conversion of imported runs. profiling: whether profiling statistics must be collected. + compress: whether the results should be compressed. Returns: The ``ResultsDescription`` object with the results. """ if profiling: - import psutil, os - process = psutil.Process(os.getpid()) + import psutil + from os import getpid + process = psutil.Process(getpid()) warnings.warn(f"Memory usage at start of collect_results: {process.memory_info().rss / 1e6:.1f} MB") # calculate the minimum number of function evaluations that must be valid From 3b60e4f3d5e6adffb12fc017245d15e07186b27c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 3 Sep 2025 20:10:02 +0200 Subject: [PATCH 233/234] Updated benchmark hub submodule --- benchmark_hub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_hub b/benchmark_hub index ca104e9..ff76e2c 160000 --- a/benchmark_hub +++ b/benchmark_hub @@ -1 +1 @@ -Subproject commit ca104e907d22c7a78067c234fb933da731d18a90 +Subproject commit ff76e2c86a7d9b3f389038589660e8b6ef4b4a5e From 0a836047f69762042f3f0304781e0b792d95c244 Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Wed, 3 Sep 2025 20:10:50 +0200 Subject: [PATCH 234/234] Updated Kernel Tuner dependency version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9503feb..cbd86f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "progressbar2 >= 4.2.0", "jsonschema >= 4.17.3", "nonconformist >= 2.1.0", - "kernel_tuner >= 1.2", + "kernel_tuner >= 1.3.0", ] [project.optional-dependencies]