diff --git a/hyperoptimization/__init__.py b/hyperoptimization/__init__.py new file mode 100644 index 0000000..4eca69c --- /dev/null +++ b/hyperoptimization/__init__.py @@ -0,0 +1,6 @@ +from .neural_models import FCDeConvNetSigOut, FCDeConvNet +from .workers import TVAEWorker +from .explore import print_best +from .runs import local_sequential + +__all__ = ["FCDeConvNet", "FCDeConvNetSigOut", "TVAEWorker", "print_best", "local_sequential"] diff --git a/hyperoptimization/explore.py b/hyperoptimization/explore.py new file mode 100644 index 0000000..958aa16 --- /dev/null +++ b/hyperoptimization/explore.py @@ -0,0 +1,197 @@ +import hpbandster.core.result as hpres +import hpbandster.visualization as hpvis +import json +import matplotlib.pyplot as plt +from collections import Counter +import numpy as np +import os + + +class ValidFreeEnergy: + """ + Extracts the free energy from an hpbandster run if the run was successful. + If the free energy of the loss is requested (i.e. training), the negative loss is returned. + """ + + def __init__(self, key): + """ + :param key: The key of the free energy to extract. + (one of loss, validation, + """ + self.key = key + + def __call__(self, run): + if not self.key == "loss": + if "info" not in vars(run): + return None # broken run + if run["info"] is None: + return None # broken run, but later + if run["info"][self.key] is None: + return None # broken run, but silently + else: + return run["info"][self.key] + + else: + if run[self.key] is None: + return None + else: + return -run[self.key] # hpbandster minimizes, but we report -loglikelihood + + +def result_and_runs(path): + """ + :param path: directory of the results and config json files + :return: the results and all_runs objects + """ + result = hpres.logged_results_to_HBS_result(path) + all_runs = result.get_all_runs() + return result, all_runs + + +def sorted_by_value(runs, key="loss"): + """ + :param runs: outpout of an hpbandster.core.result method + :param key: the key by which to sort the results + :return: a sorted list with only valid results + """ + + get_if_valid = ValidFreeEnergy(key) + return sorted([run for run in runs if get_if_valid(run) is not None], key=get_if_valid) + + +def print_best(path="", printable="loss", criterion="loss", show_config=True, top_n=10): + """ + :param path: directory of the results and config json files + :param printable: value to print + :param criterion: value to sort by for the top N selection + :param show_config: Bool prints model config + :param top_n: number of models to show + :return: + """ + + result, all_runs = result_and_runs(path) + id2conf = result.get_id2config_mapping() + + by_criterion = sorted_by_value(all_runs, key=criterion) + + print("Good confs as judged by {}: ".format(criterion)) + for i in range(top_n): + + # get value + if criterion == "loss": + value = -by_criterion[::-1][i][printable] # fix minus from Hpbandster minimization + else: + value = by_criterion[::-1][i]["info"][printable] + + # get config id + id = "".join([(str(id_).rjust(3, " ")) for id_ in by_criterion[::-1][i]["config_id"]]) + + # print result + print( + "{}. with {}/free energy= {} |id ({})".format( + str(i + 1).rjust(2), printable, str(round(value, 6)).ljust(12), id + ) + ) + + if show_config: + config = id2conf[by_criterion[i]["config_id"]] + print(json.dumps(config, indent=4)) + + +def print_error_configs(path, top_n_broken=10): + """ + This function picks out and prints the hyperparameters that are most frequent + in configs that had an interrupted run. + :param path: directory of the results and config json files + :param top_n_broken: number of hyperparameters to print + :return: None + """ + result, all_runs = result_and_runs(path) + id2conf = result.get_id2config_mapping() + + all_confs = [id2conf[run["config_id"]] for run in all_runs] + broken_confs = [id2conf[run["config_id"]] for run in all_runs if run["info"] is None] + + all_hyperparamters_by_usage = Counter( + [key for conf in all_confs for key in conf["config"].keys()] + ) + constantly_used = [ + key + for key in all_hyperparamters_by_usage + if all_hyperparamters_by_usage[key] == len(all_runs) + ] + broken_hyperparams_used = [ + key for conf in broken_confs for key in conf["config"].keys() if key not in constantly_used + ] + + max_len = max([len(key) for key in broken_hyperparams_used]) + broken_hyperparams_used = [key.ljust(max_len) for key in broken_hyperparams_used] + + print("{} broken runs found".format(len(broken_confs))) + print("Top {} hyperparams by frequency:".format(top_n_broken)) + temp = np.array(Counter(broken_hyperparams_used).most_common(top_n_broken)).T + temp = np.array([temp[1], temp[0]]).T + temp = ["".join(a + ": " + b) for a, b in temp] + print("\n".join(temp)) + + +def visualize(path): + """ + This function visualize the behaviour of an hpbandster run + :param path: directory of the results and config json files + :return: + """ + # get results + result, all_runs = result_and_runs(path) + + # plot: + + # losses by budget + hpvis.losses_over_time(all_runs) + + # concurent runs over time + hpvis.concurrent_runs_over_time(all_runs) + + # finished runs over time + hpvis.finished_runs_over_time(all_runs) + + # spearman rank correlation over budgets + hpvis.correlation_across_budgets(result) + + # model based configs vs random search + hpvis.performance_histogram_model_vs_random(all_runs, result.get_id2config_mapping(), show=True) + + plt.show() + + +if __name__ == "__main__": + # path = 'results' + path = os.path.abspath( + os.path.join(os.path.abspath(__file__), "../../dynamically_binarized_mnist/results_2") + ) + print_error_configs(path) + print("\n") + print_best( + path, + printable="validation accuracy", + criterion="train accuracy", + show_config=False, + top_n=5, + ) + print("\n") + print_best( + path, + printable="validation accuracy", + criterion="validation accuracy", + show_config=True, + top_n=5, + ) + print("\n") + print_best( + path, + printable="test accuracy", + criterion="test accuracy", + show_config=False, + top_n=5, + ) + visualize(path) diff --git a/hyperoptimization/hyperoptimize_tvae_bars_test.py b/hyperoptimization/hyperoptimize_tvae_bars_test.py new file mode 100644 index 0000000..7bc07ed --- /dev/null +++ b/hyperoptimization/hyperoptimize_tvae_bars_test.py @@ -0,0 +1,52 @@ +from argparse import ArgumentParser as Parser + +from typing import Tuple + +from hyperoptimization.workers import TVAEWorker +from hyperoptimization.utils import parse_hyperopt_args as hyperopt +from hyperoptimization.runs import local_sequential as run +import logging + +logging.basicConfig(level=logging.WARNING) + + +def experiment(parser): + + parser.add_argument("dataset", help="HD5 file as expected in input by tvo.Training") + parser.add_argument("--Ksize", type=int, default=3, help="size of each K^n set") + parser.add_argument("--epochs", type=int, default=40, help="number of training epochs") + parser.add_argument( + "--net-shape", + required=True, + type=parse_net_shape, + help="column-separated list of layer sizes", + ) + parser.add_argument("--min_lr", type=float, help="MLP min learning rate", required=True) + parser.add_argument("--max_lr", type=float, help="MLP max learning rate", required=True) + parser.add_argument("--batch-size", type=int, required=True) + parser.add_argument("--output", help="output file for train log", required=True) + parser.add_argument( + "--seed", + type=int, + help="seed value for random number generators. default is a random seed", + ) + return parser + + +def parse_net_shape(net_shape: str) -> Tuple[int, ...]: + """ + Parse string with TVAE shape into a tuple. + + :param net_shape: column-separated list of integers, e.g. `"10:10:2"` + :returns: a tuple with the shape as integers, e.g. `(10,10,2)` + """ + return tuple(map(int, net_shape.split(":"))) + + +parser = experiment(hyperopt(Parser())) +parsed_args = parser.parse_args() + +worker = TVAEWorker +pr = None +# pr = result.logged_results_to_HBS_result("results") +run(worker=worker, parsed_args=parsed_args, previous_run=pr) diff --git a/hyperoptimization/neural_models.py b/hyperoptimization/neural_models.py new file mode 100644 index 0000000..09b5961 --- /dev/null +++ b/hyperoptimization/neural_models.py @@ -0,0 +1,439 @@ +import warnings +import numpy as np +from warnings import warn +from typing import List +import torch as to +import torch.nn as nn + + +class FCDeConvNet(to.nn.Module): + def __init__( + self, + n_deconv_layers: int, + n_fc_layers: int, + W_shapes: List[int], + fc_activations: List, + dc_activations: List, + n_kernels: List[int], + dropouts: List[bool], + batch_norms: List[bool], + output_shape: int, + input_size, + dropout_rate=0.25, + filters_from_fc=1, + kernels=None, + paddings=None, + sanity_checks=False, + dtype=to.double, + ): + """ + Adjustable deconvolutional network class. It builds an optionally deconvolutional + generative model with a fully connected base. Both options are set in blocks of + [(fully connected/deconv) layer,(dropout/batchnorm) regularizer, nonlinearity]. + # Todo make fc base completely optional? + + :param n_deconv_layers: number of transposed convolutions to be applied to the + embedding (after fc layers) + :param n_fc_layers: number of fully connected layers to be applied to S + :param W_shapes: weight shapes of the fully connected layers. Currenly one shape + is used for all layers. + :param fc_activations: set of activations for the fully connected layers. + :param dc_activations: set of activations for the deconv layers. + :param n_kernels: number of filters per deconv layer + :param dropouts: List of dropout booleans. Only applied to fc layers. + :param batch_norms: List of batch norm booleans. Only applied to deconv blocks. + :param output_shape: X.shape[-1] + :param input_size: S.shape + :param dropout_rate: global dropout rate # todo enable local dropout? + :param filters_from_fc: the amount of filters to use for the hidden representation + of the linear stack + :param sanity_checks: BOOL, decides whether sanity checks are run at init. + """ + super().__init__() + + if sanity_checks: + fc_sanitize = (W_shapes, n_fc_layers, dropouts) + dc_sanitize = (n_deconv_layers, n_kernels, batch_norms, filters_from_fc) + self.test_sanity(input_size, fc_sanitize, dc_sanitize) + + self.shape = [input_size] + + self.n_deconv_layers = n_deconv_layers + + if n_kernels and n_kernels[-1] != 1: + warnings.warn( + "Final number of kernels exceeds expected dimensionality. Setting manually to 1." + ) + n_kernels[-1] = 1 + + if n_fc_layers: + if not n_deconv_layers: + warnings.warn( + "Using fully connected network, output layer set to to input shape manually" + ) + W_shapes[-1] = output_shape + self.fc_stack = FCnet( + input_size, W_shapes, fc_activations, dropouts, dropout_rate, dtype=to.double + ) + + else: + self.fc_stack = nn.Sequential() + + if n_deconv_layers: + if n_fc_layers: + input_size = W_shapes[-1] # plug on top of fc + self.deconv_stack = Deconvnet( + input_size, + filters_from_fc, + kernels, + output_shape, + n_deconv_layers, + n_kernels, + dc_activations, + paddings, + batch_norms, + dtype=to.double, + ) + self.deconv_stack.output_shape = output_shape + + else: + self.deconv_stack = nn.Sequential() + + def forward(self, x): + x = x.double() + h = self.fc_stack(x) + out = self.deconv_stack(h) + return out + + def number_of_parameters(self): + return sum(p.numel() for p in self.parameters() if p.requires_grad) + + @staticmethod # static to make mypy happy + def test_sanity(input_size, fc_sanitize, dc_sanitize): + + # make sanity checks + # Todo: decide whether and which sanity checks can be removed + W_shapes, n_fc_layers, dropouts = fc_sanitize + n_deconv_layers, n_kernels, batch_norms, filters_from_fc = dc_sanitize + + # fc sanity + assert ( + len(W_shapes) == n_fc_layers == len(dropouts) + ), "add information for all fc layers (dropout can be 0)" + + # dc sanity + assert ( + len(n_kernels) == n_deconv_layers == len(batch_norms) + ), "add information for all deconv layers" + + # fc+dc sanity + if n_kernels and n_fc_layers: + initial_deconv_dim = int(np.sqrt(W_shapes[-1] / n_kernels[0])) + assert n_kernels[0] == W_shapes[-1] / initial_deconv_dim**2, ( + "the output of the final fully connected layer should be " + "a product of squares, where the product is the number " + "of filters and the square is the shape of the filters. " + ) + # dc sanity + elif n_kernels: + initial_deconv_dim = int(np.sqrt(input_size / n_kernels[0])) + assert initial_deconv_dim == np.sqrt( + input_size / n_kernels[0] + ), "pure deconvnet can only be used if the input size is a product of squares" + + assert ( + filters_from_fc == int(filters_from_fc) and filters_from_fc > 0 + ), "filters need to be positive" + + +class FCnet(to.nn.Module): + def __init__( + self, + input_size, + W_shapes: List[int], + fc_activations: List, + dropouts: List[bool], + dropout_rate=0.25, + dtype=to.double, + ): + + super().__init__() + + if not hasattr(self, "shape"): + self.shape = [input_size] + + if not hasattr(self, "fc_stack"): + self.fc_stack = nn.Sequential() + + # setup fully connected blocks + in_features = input_size + + # build fc blocks + for i, (n_hidden, activation, dropout) in enumerate( + zip(W_shapes, fc_activations, dropouts) + ): + self.shape.append(n_hidden) # store shape for TVEM + self.fc_stack.add_module( + "linear_{}".format(i), + nn.Linear(in_features, out_features=n_hidden, dtype=dtype), + ) + # add dropout to layer + if dropout: + self.fc_stack.add_module("dropout_layer{}".format(i), nn.Dropout(dropout_rate)) + self.fc_stack.add_module("activation_{}".format(i), eval(activation)()) + in_features = n_hidden + + self.dropout = nn.Dropout(p=dropout_rate) # set the dropout rate + + def forward(self, x): + out = self.fc_stack(x) + return out + + +class Deconvnet(to.nn.Module): + def __init__( + self, + in_features, + filters_from_fc, + kernels, + output_shape, + n_deconv_layers, + n_kernels, + dc_activations, + paddings=None, + batch_norms=None, + dtype=to.double, + ): + """ + kernels: dimensionality of kernels, e.g. =[3] results in kernels of 3x3 + n_kernels: number of kernels. e.g. =2 results in 2 3x3 kernels + """ + + super().__init__() + + if not hasattr(self, "shape"): + self.shape = [in_features] + + if not hasattr(self, "fc_stack"): + self.deconv_stack = nn.Sequential() + + # transposed convolution blocks + input_len = int(np.sqrt(in_features)) + input_shape = (input_len, input_len, filters_from_fc) + + if not kernels: + # calculate total increase in dimensionality + total_upsampling = int(np.sqrt(output_shape) - np.sqrt(input_shape[0] * input_shape[1])) + assert total_upsampling == np.sqrt(output_shape) - np.sqrt( + input_shape[0] * input_shape[1] + ) + + if total_upsampling < 0: + warn("Transposed convolution used for downsampling") + + # calculate kernel sizes and paddings, such as the outputs match the + # dimensionality of the output + kernels, paddings = self.deconvolution_hypers_from_upsampling( + upsampling=total_upsampling, min_kernel=3, n_layers=n_deconv_layers + ) + + if not paddings: + paddings = [0] * len(kernels) + + if not batch_norms: + batch_norms = [0] * len(kernels) + + # print(total_upsampling, kernels, paddings) + + # add the transposed convolution blocks + # for i in range(n_deconv_layers): + hypers = zip(batch_norms, n_kernels, kernels, paddings, dc_activations) + for i, (batch_norm, n_kernels_, kernel_size, padding, activation) in enumerate(hypers): + + self.shape.append( + (n_kernels_ * kernel_size**2) + ) # tuple denotes that n params is from filters + self.deconv_stack.add_module( + "conv_transpose_{}".format(i), + nn.ConvTranspose2d( + in_channels=input_shape[-1], + out_channels=n_kernels_, + kernel_size=kernel_size, + padding=padding, + dtype=dtype, + ), + ) + + if batch_norm: + self.deconv_stack.add_module("batch_norm_{}".format(i), nn.BatchNorm2d(n_kernels_)) + + self.deconv_stack.add_module("deconv_activation_{}".format(i), eval(activation)()) + + input_shape = self.deconv_output_shape( + input_len=input_shape[0], + filters=n_kernels_, + kernel=kernel_size, + padding=padding, + ) + + assert input_shape[0] == input_shape[1] + assert output_shape == np.prod( + input_shape + ), "output ({}) not equal to product of input ({})".format(output_shape, input_shape) + + # todo: change self.shape functionality appropriately after the TVAE changes + self.shape.append(output_shape) + + def forward(self, x): + n, S_kn, D = x.shape[0], x.shape[1], self.output_shape + h = x.reshape(n, S_kn, int(np.sqrt(x.shape[-1])), int(np.sqrt(x.shape[-1]))) + out = to.empty(size=(n, S_kn, D), device=h.device, dtype=h.dtype) + for s in range(S_kn): + h_s = self.deconv_stack(h[:, s, :, :].unsqueeze(axis=1)) + # h_s = to.sum(h_s, dim=1) + # todo force last filter to match the dimensionality? + + out[:, s, :] = to.reshape(h_s, (n, D)) + + return out + + @staticmethod + def deconvolution_hypers_from_upsampling(upsampling: int, min_kernel=3, n_layers=1): + """ + :param upsampling: dimentionality needed to upsample image + :param min_kernel: minimum kernel size + :param n_layers: number of layers on which to spread the upsampling on + :return: the kernel size and padding for each layer such that the upsampling is obeyed. + """ + + # todo: decide how to treat bad n_layers input + if n_layers <= 0: + return [], [] + assert n_layers, "no layers provided" + + if upsampling == 0: + return [3] * n_layers, [1] * n_layers + + # each layer gets the same amount of upsampling + layer_upsampling = upsampling // n_layers + + # any remaining upsampling goes to the final layer to reduce compute + last_layer = layer_upsampling + upsampling % n_layers + + assert layer_upsampling * n_layers + upsampling % n_layers == upsampling + + # assign upsampling to layers + layers_upsampling = [layer_upsampling for _ in range(n_layers)] + layers_upsampling[-1] = last_layer + kernels, paddings = [], [] + + # compute kernel/padding combination for each upsampling value + for upsampling_ in layers_upsampling: + # padding adds 2 + # kernel adds -1, with kernel=1 -> upsampling=0 + kernel = min_kernel + padding = (min_kernel - upsampling_ - 1) / 2 + + # padding uneven + if padding != int(padding): + kernel -= 1 * np.sign(padding) + padding = int(padding) + + # # padding uneven + # if padding != int(padding): + # kernel += 1 * np.sign(padding) + # padding = int(padding) + + if padding < 0: + kernel += 2 * abs(padding) + padding = 0 + + # print('upsampling for layer: kernels={}, padding={}'.format(kernel, padding)) + assert ( + -2 * padding + (kernel - 1) == upsampling_ + ), "Logical error in upsampling: padding = {}, kernel = {}, upsamplings = {}".format( + padding, kernel, layers_upsampling + ) + # todo decide whether to allow negative padding + assert padding >= 0, "upsampling {} with negative padding={}".format( + upsampling_, padding + ) + kernels.append(int(kernel)) + paddings.append(int(padding)) + + actual = np.sum(np.array(kernels) - 1) - 2 * np.sum(np.array(paddings)) + diff = actual - upsampling + + assert ( + diff == 0 + ), "Unexpected diff={} between expected result ({})" "and actual ({})".format( + diff, upsampling, actual + ) + return kernels, paddings + + def deconv_output_shape( + self, + input_len, + filters, + kernel, + stride=1, + padding=0, + dilation=1, + output_padding=0, + ): + """ + returns the output shape of a transposed convolutional layer + :param input_len: length of input: for 3x28x28 coloured MNIST it's 28 + :param filters: number of filters + :param kernel: 1D length of kernel: for a 3x3 kernel it's a 3 + :param stride: stride of the filter + :param padding: 1D size of padding around the input. + :return: the output shape of a transposed convolutional layer + """ + assert type(input_len + filters + stride + kernel + dilation + output_padding) is int + out = int( + stride * (input_len - 1) - 2 * padding + dilation * (kernel - 1) + output_padding + 1 + ) + return (out, out, filters) + + def conv_output_shape(self, input_len, filters, kernel, stride=1, padding=0, dilation=1): + """ + returns the output shape of a convolutional layer + :param input_len: length of input: for 3x28x28 coloured MNIST it's 28 + :param filters: number of filters + :param kernel: 1D length of kernel: for a 3x3 kernel it's a 3 + :param stride: stride of the filter + :param padding: 1D size of padding around the input. + :return: the output shape of a convolutional layer + """ + assert type(input_len + filters + stride + kernel) is int + out = (input_len + 2 * padding - dilation * (kernel - 1) - 1) / stride + 1 + out = int(out) + return (out, out, filters) + + +# added in neural_models.py + + +class FCDeConvNetSigOut(FCDeConvNet): + def forward(self, x): + return to.sigmoid(super(FCDeConvNetSigOut, self).forward(x)) + + +# this function computes the feature map of a convolutional layer. +def feature_map(w, h, d, n_kernels, kernel_size): + w2 = w - kernel_size + 1 + h2 = h - kernel_size + 1 + d2 = n_kernels + volume = w2, h2, d2 + n_weights = kernel_size**2 * d * n_kernels + return volume, n_weights + + +def deconv_2_l(n_kernels): + volume1, n_weights1 = feature_map(32, 32, 3, 3, 15) + w, h, d = volume1 + volume2, n_weights2 = feature_map(w, h, d, n_kernels, 15) + v1 = volume1[0] * volume1[1] * volume1[2] + v2 = volume2[0] * volume2[1] * volume2[2] + return v1 + v2, n_weights2 + n_weights1 diff --git a/hyperoptimization/run_best_config.py b/hyperoptimization/run_best_config.py new file mode 100644 index 0000000..af8431f --- /dev/null +++ b/hyperoptimization/run_best_config.py @@ -0,0 +1,72 @@ +import hpbandster.core.result as hpres +from hyperoptimization.runs import from_config as run +from hyperoptimization.workers import TVAEWorker +from hyperoptimization.utils import parse_hyperopt_args as hyperopt +from hyperoptimization.explore import sorted_by_value +from argparse import ArgumentParser as Parser +from typing import Tuple + + +def experiment(parser): + + parser.add_argument("dataset", help="HD5 file as expected in input by tvo.Training") + parser.add_argument("--Ksize", type=int, default=3, help="size of each K^n set") + parser.add_argument("--epochs", type=int, default=40, help="number of training epochs") + parser.add_argument( + "--net-shape", + required=True, + type=parse_net_shape, + help="column-separated list of layer sizes", + ) + parser.add_argument("--min_lr", type=float, help="MLP min learning rate", required=True) + parser.add_argument("--max_lr", type=float, help="MLP max learning rate", required=True) + parser.add_argument("--batch-size", type=int, required=True) + parser.add_argument("--output", help="output file for train log", required=True) + parser.add_argument( + "--seed", + type=int, + help="seed value for random number generators. default is a random seed", + ) + return parser + + +def parse_net_shape(net_shape: str) -> Tuple[int, ...]: + """ + Parse string with TVAE shape into a tuple. + + :param net_shape: column-separated list of integers, e.g. `"10:10:2"` + :returns: a tuple with the shape as integers, e.g. `(10,10,2)` + """ + return tuple(map(int, net_shape.split(":"))) + + +parser = experiment(hyperopt(Parser())) +parsed_args = parser.parse_args() + +worker = TVAEWorker + +path = "" +result = hpres.logged_results_to_HBS_result(path) +all_runs = result.get_all_runs() +id2conf = result.get_id2config_mapping() + +ordered_by_loss = sorted_by_value(all_runs, key="loss") + +best_n_configs = 5 +if best_n_configs: + raise Exception("modify to take arbitrary sort key") +for i in range(best_n_configs): + id = ordered_by_loss[i]["config_id"] + config = id2conf[id]["config"] + # config['lr']*=10 + print("Running long experiment with config:") + print("{:<20} {:<20} ".format("hyperparameter", "value")) + for key, value in config.items(): + print("{:<20} {:<20} ".format(key, value)) + + run( + config, + budget=parsed_args.epochs, + worker=worker, + parsed_args=parsed_args, + ) diff --git a/hyperoptimization/runs.py b/hyperoptimization/runs.py new file mode 100644 index 0000000..a4b72bd --- /dev/null +++ b/hyperoptimization/runs.py @@ -0,0 +1,130 @@ +import os +import pickle + +import hpbandster.core.nameserver as hpns +import hpbandster.core.result as hpres +from hpbandster.optimizers import BOHB + + +def from_config(config, budget, worker, parsed_args, *args, **kwargs): + + host = hpns.nic_name_to_host(parsed_args.nic_name) + + # Start a nameserver: + NS = hpns.NameServer( + run_id=parsed_args.run_id, + host=host, + port=None, + working_directory=parsed_args.shared_directory, + ) + ns_host, ns_port = NS.start() + + # Start local worker + w = worker( + run_id=parsed_args.run_id, + host=host, + nameserver=ns_host, + nameserver_port=ns_port, + timeout=120, + parsed_args=parsed_args, + *args, + **kwargs + ) + w.run(background=True) + res = w.compute(config=config, budget=budget, working_directory=os.getcwd(), *args, **kwargs) + with open(os.path.join(parsed_args.shared_directory, "results.pkl"), "wb") as fh: + pickle.dump(res, fh) + + NS.shutdown() + + +def local_sequential(worker, parsed_args, previous_run=None, *args, **kwargs): + + # get hostname + host = hpns.nic_name_to_host(parsed_args.nic_name) + + # log results + result_logger = hpres.json_result_logger(directory=parsed_args.shared_directory, overwrite=True) + + # Start a nameserver: + NS = hpns.NameServer( + run_id=parsed_args.run_id, + host=host, + port=None, + working_directory=parsed_args.shared_directory, + ) + ns_host, ns_port = NS.start() + + # Start local worker + w = worker( + run_id=parsed_args.run_id, + host=host, + nameserver=ns_host, + nameserver_port=ns_port, + timeout=120, + parsed_args=parsed_args, + *args, + **kwargs + ) + w.run(background=True) + + # Run an optimizer + # previous_run = hpres.logged_results_to_HBS_result('') + + bohb = BOHB( + configspace=w.get_configspace(), + run_id=parsed_args.run_id, + host=host, + nameserver=ns_host, + nameserver_port=ns_port, + result_logger=result_logger, + min_budget=parsed_args.min_budget, + max_budget=parsed_args.max_budget, + previous_result=previous_run, + ) + res = bohb.run(n_iterations=parsed_args.n_iterations) + + # store results + with open(os.path.join(parsed_args.shared_directory, "results.pkl"), "wb") as fh: + pickle.dump(res, fh) + + # shutdown + bohb.shutdown(shutdown_workers=True) + NS.shutdown() + + +def on_the_cluster(worker, parsed_args, previous_run=None, *args, **kwargs): + + host = hpns.nic_name_to_host(parsed_args.nic_name) + + NS = hpns.NameServer( + run_id=parsed_args.run_id, host=host, port=0, working_directory=parsed_args.shared_directory + ) + ns_host, ns_port = NS.start() + + w = worker( + sleep_interval=0.5, + run_id=parsed_args.run_id, + host=host, + nameserver=ns_host, + nameserver_port=ns_port, + parsed_args=parsed_args, + ) + w.run(background=True) + + bohb = BOHB( + configspace=w.get_configspace(), + run_id=parsed_args.run_id, + host=host, + nameserver=ns_host, + nameserver_port=ns_port, + min_budget=parsed_args.min_budget, + max_budget=parsed_args.max_budget, + ) + res = bohb.run(n_iterations=parsed_args.n_iterations, min_n_workers=parsed_args.n_workers) + + with open(os.path.join(parsed_args.shared_directory, "results.pkl"), "wb") as fh: + pickle.dump(res, fh) + + bohb.shutdown(shutdown_workers=True) + NS.shutdown() diff --git a/hyperoptimization/test_neural_models.py b/hyperoptimization/test_neural_models.py new file mode 100644 index 0000000..94765bc --- /dev/null +++ b/hyperoptimization/test_neural_models.py @@ -0,0 +1,207 @@ +import matplotlib.pyplot as plt +import torch as to +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from neural_models import FCnet, Deconvnet +from itertools import chain +import argparse +import logging + +logging.disable() + +parser = argparse.ArgumentParser() + +parser.add_argument("--n_epochs", default=5, type=int) +parser.add_argument( + "--model", + default="ConvDeConv", + choices=["ConvDeConv", "AE"], + help="Model can be ConvDeConv or AE", +) +parser.add_argument("--lr", default=0.0001, type=float) + +args = parser.parse_args() + +# Script made with torch==1.7.0 + +# declare processor +transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.5,), (0.5,)), + ] +) + +# prepare data +mnist_trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform) +train_loader = to.utils.data.DataLoader(mnist_trainset, batch_size=1000, shuffle=True) +mnist_testset = datasets.MNIST(root="./data", train=False, download=True, transform=transform) +test_loader = to.utils.data.DataLoader(mnist_testset, batch_size=1000, shuffle=True) +# define loss function +loss_fn = to.nn.MSELoss() + +# define optimizer +optimizer = to.optim.Adam + + +class AutoEncoder(object): + def __init__(self, shape=None, lr=0.003): + if shape is None: + shape = (28**2, 256, 64, 256, 28**2) + + # define weight initialization + w_init = to.nn.init.xavier_normal_ + + # initialize linear layers + self.W0 = w_init(to.empty(shape[0], shape[1], requires_grad=True)) + self.W1 = w_init(to.empty(shape[1], shape[2], requires_grad=True)) + self.W2 = w_init(to.empty(shape[2], shape[3], requires_grad=True)) + self.W3 = w_init(to.empty(shape[3], shape[4], requires_grad=True)) + + # load layers to optimizer + self.optimizer = optimizer([self.W0, self.W1, self.W2, self.W3], lr=lr) + + def forward(self, x): + # define activation function + f = to.nn.functional.leaky_relu + + # forward pass + h0 = f(x @ self.W0) + h1 = f(h0 @ self.W1) + h2 = f(h1 @ self.W2) + rec = to.tanh(h2 @ self.W3) + + return rec + + def train(self, epochs): + + losses = [] + + for n_epoch in range(epochs): + + avg_train_loss = 0.0 + no_datapoints = 0 + + for batch, target in train_loader: + + # get 0s and 1s + batch_ = ( + batch[to.logical_or(target == 0, target == 1)].flatten(start_dim=1).double() + ) + if batch_.shape[0] == 0: + continue + + # train + self.optimizer.zero_grad() + reconstruction = self.forward(batch_).resize_as(batch_) + loss = loss_fn(reconstruction, batch_) + loss.backward() + self.optimizer.step() + + # log + avg_train_loss += loss.data.item() + no_datapoints += len(batch_) + + print( + "Epoch {} finished with average loss of {}".format( + n_epoch, round(avg_train_loss, 6) + ) + ) + losses.append(avg_train_loss) + + print("Training is complete.") + return losses + + +class FCAE(AutoEncoder): + def __init__(self, shape=None): + super().__init__(shape) + n_stacks = len(shape) - 1 + self.model = FCnet( + input_size=shape[0], + W_shapes=shape[1:], + fc_activations=["to.nn.LeakyReLU"] * n_stacks, + dropouts=[0] * n_stacks, + dropout_rate=0.25, + ) + self.optimizer = optimizer(self.model.parameters(), lr=0.003) + + def forward(self, x): + return self.model.forward(x) + + +class ConvDeconv(AutoEncoder): + def __init__(self, shape=None, dtype=to.double, lr=0.003): + super().__init__(shape, lr=lr) + n_stacks = len(shape) - 1 + half_stacks = n_stacks // 2 + self.encoder = to.nn.Sequential( + to.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, dtype=dtype), + to.nn.Flatten(), + to.nn.LeakyReLU(), + to.nn.LazyLinear(out_features=shape[half_stacks], dtype=dtype), + to.nn.LeakyReLU(), + ) + + self.decoder = Deconvnet( + in_features=shape[half_stacks], + filters_from_fc=1, + kernels=None, + output_shape=28**2, + n_deconv_layers=2, + n_kernels=[5, 1], + batch_norms=None, + dc_activations=["to.nn.Tanh"] * half_stacks, + ) + self.decoder.D = 28**2 + self.decoder.double() + + parameters = chain(self.encoder.parameters(), self.decoder.parameters()) + self.optimizer = optimizer(parameters, lr=0.003) + + def forward(self, x): + z = self.encoder(x.double().reshape(x.shape[0], 1, 28, 28)) + # set S_K(n) to 1 because we are reusing Deconvnet + z = z.reshape(z.shape[0], 1, z.shape[-1]) + x_hat = self.decoder(z) + + return x_hat + + +plt.ioff() + +# define model +if args.model == "ConvDeConv": + model = ConvDeconv(shape=(28**2, 256, 64, 256, 28**2), lr=args.lr) +elif args.model == "AE": + model = AutoEncoder(shape=(28**2, 256, 64, 256, 28**2), lr=args.lr) # type: ignore +# train model +avg_train_loss = model.train(epochs=args.n_epochs) + +# get 0s and 1s for testing +for batch, target in test_loader: + batch_ = batch[to.logical_or(target == 0, target == 1)].flatten(start_dim=1) + if batch_.shape[0] != 0: + break + +# reconstruct +reconstruction = model.forward(batch_) + +# plot originals against reconstruction +n_show = 7 +fig, axs = plt.subplots(2, n_show) +for i in range(n_show): + axs[0, i].imshow(batch_[i].reshape(28, 28)) + axs[0, i].axis("off") + axs[1, i].imshow(reconstruction[i].reshape(28, 28).detach().numpy()) + axs[1, i].axis("off") + if round((n_show - 1) / 2) == i: + axs[0, i].title.set_text("Original") + axs[1, i].title.set_text("Reconstruction") +plt.savefig("reconstruction") + +# plot training loss +plt.figure() +plt.plot(avg_train_loss) +plt.title("loss") +plt.savefig("loss") diff --git a/hyperoptimization/utils.py b/hyperoptimization/utils.py new file mode 100644 index 0000000..345dd58 --- /dev/null +++ b/hyperoptimization/utils.py @@ -0,0 +1,51 @@ +def parse_hyperopt_args(parser): + """ + :param parser: an Argument Parser object from argparse + :return: non-initialized parser with the necessary hpbandster arguments + """ + + parser.add_argument( + "--min_budget", + type=float, + help="Minimum number of epochs for training.", + default=1, + ) + parser.add_argument( + "--max_budget", + type=float, + help="Maximum number of epochs for training.", + default=5, + ) + parser.add_argument( + "--n_iterations", + type=int, + help="Number of iterations performed by the optimizer", + default=16, + ) + parser.add_argument( + "--worker", + help="Flag to turn this into a worker process", + action="store_true", + default=False, + ) + parser.add_argument( + "--run_id", + type=str, + help="A unique run id for this optimization run. An easy option is " + "to use the job id of the clusters scheduler.", + default="derp", + ) + parser.add_argument( + "--nic_name", + type=str, + help="Which network interface to use for communication.", + default="lo", + ) + parser.add_argument( + "--shared_directory", + type=str, + help="A directory that is accessible for all processes, e.g. a NFS share.", + default=".", + ) + + return parser diff --git a/hyperoptimization/workers.py b/hyperoptimization/workers.py new file mode 100644 index 0000000..6c4307a --- /dev/null +++ b/hyperoptimization/workers.py @@ -0,0 +1,627 @@ +import h5py +import numpy as np +import torch as to + +import tvo +from tvo.utils import get +from tvo.models import BernoulliTVAE as TVAE +from tvo.exp import EVOConfig, ExpConfig, Training, Testing + +from hyperoptimization.neural_models import FCDeConvNetSigOut as FCDeConvNet + +import ConfigSpace as CS +import ConfigSpace.hyperparameters as CSH +from hpbandster.core.worker import Worker + +import logging + +logging.basicConfig(level=logging.INFO) + + +class BaseWorker(Worker): + def __init__(self, **kwargs): + super().__init__(**self.extract_worker_args(**kwargs)) + + def extract_worker_args(self, **kwargs): + """ + This function enables the keyword arguments dictionary that is passed to the Worker class + to contain keywords that are not named explicitly by the base Worker class, but are + otherwise useful to a downstream class, e.g. to the TVAE. + :param kwargs: any **kwargs + :return: inputs accepted by Worker class + """ + assert "run_id" in kwargs.keys(), "run_id is necessary" + kw = { + "run_id": None, + "nameserver": None, + "nameserver_port": None, + "logger": None, + "host": None, + "id": None, + "timeout": None, + } + for key in kw: + if key in kwargs: + kw[key] = kwargs[key] + return kw + + +# TODO: See if it is useful to make a cleaner separation between model and worker +class TVAEWorker(BaseWorker): + def __init__(self, parsed_args, **kwargs): + """ + :param parsed_args: list of arguments passed to the script. It is expected to + contain the following: + - Ksize: number of states to be kept for truncated inference + - dataset: name of the dataset to be used + - epochs per half cycle: number of epochs until a half cycle of cyclic learning + rate is completed + - batch size: number of samples per batch + - output: name of the output file + - min_lr: minimum learning rate for the cyclic learning rate scheduler + - max_lr: maximum learning rate for the cyclic learning rate scheduler + - net_shape: shape of the network. If no H argument is passed, the final layer + is used to infer the H size. + - H: size of the first generative layer. This option should be used, as net_shape + will be phased out. + - cyclic_lr: whether to use cyclic learning rate or not. If False, the learning + rate will be constant. + :param kwargs: Additional arguments to be passed to the underlying Worker class. + """ + + # call base class constructor + super().__init__(**kwargs) + + # extract args + self.S = S = parsed_args.Ksize + self.data_fname = parsed_args.dataset + self.epochs_per_half_cycle = 1 + self.batch_size = parsed_args.batch_size + self.output = parsed_args.output + self.min_lr = parsed_args.min_lr + self.max_lr = parsed_args.max_lr + + # infer size of H from net_shape. TODO: phase net_shape out + try: + net_shape = parsed_args.net_shape + self.H = H = net_shape[-1] + except AttributeError: + self.H = H = parsed_args.H + + try: + self.cyclic_lr = parsed_args.cyclic_lr + except AttributeError: + self.cyclic_lr = False + + # infer hyperparameter status + self.is_hyperparameter_S = not (S) + self.is_hyperparameter_H = not (H) + self.is_hyperparameter_EEM = False + + # loads data, sets N and D + self.handle_data() + + # set the config space + self.set_configspace() + + # print out dataset information + print(f"\ninput file: {parsed_args.dataset}") + try: + print(f"true logL: {self.data_file['ground_truth']['logL'][...]}") + except KeyError: + pass + + def compute(self, config, budget, working_directory, *args, **kwargs): + """ + :param config: a config in hpbandster style that contains the model hyperparameters. + :param budget: amount of epochs to run the model + :param working_directory: arg used by hpbandster + :param args: other args + :param kwargs: other keyworded args + :return: loss and additional run information + """ + + # extract S and H if they are hyperparameters + self.extract_hypers_from_config(config) + + # define the model + model = self.get_external_model(config) + + # setup optimizer + if config["optimizer"] == "SGD": + optimizer = to.optim.SGD( + model.parameters(), + lr=config["lr"], + momentum=config["sgd_momentum"], + ) + elif config["optimizer"] == "Adam": + optimizer = to.optim.Adam(model.parameters(), lr=config["lr"]) + else: + raise NotImplementedError("Currently we support only SGD with momentum and Adam") + + model.to(tvo.get_device()) + model.device = tvo.get_device() + + # setup TVAE + cycliclr_half_step_size = np.ceil(self.N / self.batch_size) * self.epochs_per_half_cycle + + if not self.cyclic_lr: + self.min_lr = self.max_lr = config["lr"] + + model = TVAE( + external_model=model, + shape=None, + min_lr=self.min_lr, + max_lr=self.max_lr, + cycliclr_step_size_up=cycliclr_half_step_size, + optimizer=optimizer, + precision=to.double, + ) + + exp_conf = ExpConfig( + batch_size=self.batch_size, + output=self.output, + data_transform=self.data_transform, + ) + estep_conf = self.get_EEM_conf(config) + + # setup training + data_fname = self.data_fname + + training = Training(exp_conf, estep_conf, model, data_fname, self.valid_fname) + testing = Testing(exp_conf, estep_conf, model, data_fname) + print("\nlearning...") + training_results = [] + for train_log in training.run(int(budget)): + train_log.print() + training_results.append(train_log._results) + + testing_results = [] + for test_log in testing.run(1): + # test_log.print() + testing_results.append(test_log._results) + + train_F, subs = get(training_results[-1], "train_F", "train_subs") + valid_F, subs = get(training_results[-1], "test_F", "test_subs") + + test_F, subs = get(testing_results[-1], "test_F", "test_subs") + + # optimizable = -train_F # HpBandSter always minimizes + # + # if self.valid_fname: + # optimizable = -valid_F # HpBandSter always minimizes + + return { + "loss": -train_F if -train_F else np.nan, + "info": { + "test accuracy": test_F, + "train accuracy": train_F, + "validation accuracy": valid_F, + "number of parameters": model._external_model.number_of_parameters(), + }, + } + + def get_external_model(self, config): + # unpack external model args from config + ( + n_deconv_layers, + n_fc_layers, + W_shapes, + fc_activations, + dropouts, + dc_activations, + n_filters, + batch_norms, + dropout_rate, + kernels, + ) = self.model_args_from_(config, sanity_checks=False) + + # setup external model + model = FCDeConvNet( + n_deconv_layers=n_deconv_layers, + n_fc_layers=n_fc_layers, + W_shapes=W_shapes, + fc_activations=fc_activations, + dc_activations=dc_activations, + n_kernels=n_filters, + batch_norms=batch_norms, + dropouts=dropouts, + dropout_rate=dropout_rate, + input_size=self.H, + output_shape=self.D, + filters_from_fc=1, + kernels=kernels, + ) + + model.H0 = model.shape[0] + model.D = self.D + model.double() + return model + + def get_EEM_conf(self, config): + if self.is_hyperparameter_EEM: + estep_conf = EVOConfig( + n_states=config["S"], + n_parents=config["n_parents"], + n_children=config["n_children"], + n_generations=1, + crossover=False, + ) + else: + estep_conf = EVOConfig( + n_states=self.S, + n_parents=min(3, self.S), + n_children=min(2, self.S), + n_generations=1, + crossover=False, + ) + return estep_conf + + def set_configspace(self): + add_EEM = self.add_EEM + add_fc_deconv = self.add_FCDeconv + + if self.is_hyperparameter_H and self.is_hyperparameter_EEM: + + def custom_configspace(): + cs = CS.ConfigurationSpace() + cs = add_EEM(add_fc_deconv(cs)) + H = CSH.UniformIntegerHyperparameter(name="H", lower=1, upper=10) + cs.add_hyperparameters([H]) + return cs + + elif self.is_hyperparameter_H: + + def custom_configspace(): + cs = CS.ConfigurationSpace() + cs = add_fc_deconv(cs) + H = CSH.UniformIntegerHyperparameter(name="H", lower=1, upper=10) + cs.add_hyperparameters([H]) + return cs + + elif self.is_hyperparameter_EEM: + + def custom_configspace(): + cs = CS.ConfigurationSpace() + cs = add_EEM(add_fc_deconv(cs)) + return cs + + else: + + add_fc_deconv = self.add_FCDeconv + + def custom_configspace(): + cs = CS.ConfigurationSpace() + return add_fc_deconv(cs) + + self.get_configspace = custom_configspace # .__get__(custom_configspace) + + @staticmethod + def add_EEM(cs): + n_states = CSH.UniformIntegerHyperparameter(name="S", lower=1, upper=6) + n_parents = CSH.UniformIntegerHyperparameter(name="n_parents", lower=1, upper=n_states) + n_children = CSH.UniformIntegerHyperparameter( + name="n_children", lower=1, upper=max(n_parents, 2) + ) + cs.add_hyperparameters([n_states, n_parents, n_children]) + return cs + + @staticmethod + def add_FCDeconv(cs): + """ + It builds the configuration space with the needed hyperparameters. + It is easily possible to implement different types of hyperparameters. + Beside float-hyperparameters on a log scale, it is also able to handle + categorical input parameter. + :return: ConfigurationsSpace-Object + """ + + lr = CSH.UniformFloatHyperparameter( + "lr", lower=1e-6, upper=1e-1, default_value="1e-2", log=True + ) + + # setup optimizers + optimizer = CSH.CategoricalHyperparameter("optimizer", ["Adam", "SGD"]) + sgd_momentum = CSH.UniformFloatHyperparameter( + "sgd_momentum", lower=0.0, upper=0.99, default_value=0.9, log=False + ) + cs.add_hyperparameters([lr, optimizer, sgd_momentum]) + + # The hyperparameter sgd_momentum will be used,if the configuration + # contains 'SGD' as optimizer. + cond = CS.EqualsCondition(sgd_momentum, optimizer, "SGD") + + cs.add_condition(cond) + + # set general block length + max_block_length = 5 # maximum block size for deconv or linear stack + + # define the linear blocks + num_linear_layers = CSH.UniformIntegerHyperparameter( + "num_linear_layers", + lower=1, + upper=max_block_length, + default_value=2, + log=False, + ) + root_W_shapes = CSH.UniformIntegerHyperparameter( + "root_W_shapes", lower=7, upper=14, default_value=8, log=False + ) # squared in the compute function as of August 2021 + + # define positions of dropout layers + has_dropout_1 = CSH.UniformIntegerHyperparameter( + "dropout_1", lower=0, upper=1, default_value=0, log=False + ) + has_dropout_2 = CSH.UniformIntegerHyperparameter( + "dropout_2", lower=0, upper=1, default_value=0, log=False + ) + has_dropout_3 = CSH.UniformIntegerHyperparameter( + "dropout_3", lower=0, upper=1, default_value=0, log=False + ) + has_dropout_4 = CSH.UniformIntegerHyperparameter( + "dropout_4", lower=0, upper=1, default_value=0, log=False + ) + has_dropout_5 = CSH.UniformIntegerHyperparameter( + "dropout_5", lower=0, upper=1, default_value=0, log=False + ) + + # define activations for the fully connected stack + activation_list = ["nn.Tanh", "nn.Sigmoid", "nn.LeakyReLU"] + activation_list = [activation_list[-1]] + fc_activation_1 = CSH.CategoricalHyperparameter("fc_activation_1", ["nn.Tanh"]) + fc_activation_2 = CSH.CategoricalHyperparameter("fc_activation_2", activation_list) + fc_activation_3 = CSH.CategoricalHyperparameter("fc_activation_3", activation_list) + fc_activation_4 = CSH.CategoricalHyperparameter("fc_activation_4", activation_list) + fc_activation_5 = CSH.CategoricalHyperparameter("fc_activation_5", activation_list) + + # define the deconv blocks + num_deconv_layers = CSH.UniformIntegerHyperparameter( + "num_deconv_layers", + lower=0, + upper=max_block_length, + default_value=2, + ) + + # define filter ranges + # Todo: take filter dimensionality from x for the final filter + # TODO: remove last filter from hyperparameters + + num_filters_1 = CSH.CategoricalHyperparameter("num_filters_1", [1]) + num_filters_2 = CSH.UniformIntegerHyperparameter( + "num_filters_2", lower=1, upper=4, default_value=4, log=True + ) + num_filters_3 = CSH.UniformIntegerHyperparameter( + "num_filters_3", lower=1, upper=4, default_value=4, log=True + ) + num_filters_4 = CSH.UniformIntegerHyperparameter( + "num_filters_4", lower=1, upper=4, default_value=4, log=True + ) + num_filters_5 = CSH.UniformIntegerHyperparameter( + "num_filters_5", lower=1, upper=4, default_value=4, log=True + ) + + # define existence of per-layer batch normalization + has_batch_norm_1 = CSH.UniformIntegerHyperparameter( + "batch_norm_1", lower=0, upper=1, default_value=0, log=False + ) + has_batch_norm_2 = CSH.UniformIntegerHyperparameter( + "batch_norm_2", lower=0, upper=1, default_value=0, log=False + ) + has_batch_norm_3 = CSH.UniformIntegerHyperparameter( + "batch_norm_3", lower=0, upper=1, default_value=0, log=False + ) + has_batch_norm_4 = CSH.UniformIntegerHyperparameter( + "batch_norm_4", lower=0, upper=1, default_value=0, log=False + ) + has_batch_norm_5 = CSH.UniformIntegerHyperparameter( + "batch_norm_5", lower=0, upper=1, default_value=0, log=False + ) + + # define activations for the deconv stack + activation_list = ["nn.Tanh", "nn.Sigmoid", "nn.LeakyReLU"] + activation_list = [activation_list[-1]] + # raise ArithmeticError + dc_activation_1 = CSH.CategoricalHyperparameter("dc_activation_1", ["nn.Tanh"]) + dc_activation_2 = CSH.CategoricalHyperparameter("dc_activation_2", activation_list) + dc_activation_3 = CSH.CategoricalHyperparameter("dc_activation_3", activation_list) + dc_activation_4 = CSH.CategoricalHyperparameter("dc_activation_4", activation_list) + dc_activation_5 = CSH.CategoricalHyperparameter("dc_activation_5", activation_list) + + # add fc hyperparams + cs.add_hyperparameters( + [ + num_linear_layers, + root_W_shapes, + has_dropout_1, + has_dropout_2, + has_dropout_3, + has_dropout_4, + has_dropout_5, + fc_activation_1, + fc_activation_2, + fc_activation_3, + fc_activation_4, + fc_activation_5, + ] + ) + # add dc hyperparameters + cs.add_hyperparameters( + [ + num_deconv_layers, + num_filters_1, + num_filters_2, + num_filters_3, + num_filters_4, + num_filters_5, + has_batch_norm_1, + has_batch_norm_2, + has_batch_norm_3, + has_batch_norm_4, + has_batch_norm_5, + dc_activation_1, + dc_activation_2, + dc_activation_3, + dc_activation_4, + dc_activation_5, + ] + ) + + # Add conditions to hyperparameters. + # Activate deeper hyperparameters only if their corresponding layer is present + + # fully connected stack + for i in range(2, max_block_length + 1): + dropout_cond = CS.GreaterThanCondition( + eval("has_dropout_{}".format(i)), num_linear_layers, i - 1 + ) + cs.add_condition(dropout_cond) + activation_cond = CS.GreaterThanCondition( + eval("fc_activation_{}".format(i)), num_linear_layers, i - 1 + ) + cs.add_condition(activation_cond) + + # deconv stack + for i in range(1, max_block_length + 1): + batch_norm_cond = CS.GreaterThanCondition( + eval("has_batch_norm_{}".format(i)), num_deconv_layers, i - 1 + ) + cs.add_condition(batch_norm_cond) + activation_cond = CS.GreaterThanCondition( + eval("dc_activation_{}".format(i)), num_deconv_layers, i - 1 + ) + cs.add_condition(activation_cond) + filter_cond = CS.GreaterThanCondition( + eval("num_filters_{}".format(i)), num_deconv_layers, i - 1 + ) + cs.add_condition(filter_cond) + + # set global dropout rate + dropout_rate = CSH.UniformFloatHyperparameter( + "dropout_rate", lower=0.0, upper=0.9, default_value=0.5, log=False + ) + cs.add_hyperparameters([dropout_rate]) + + return cs + + def model_args_from_(self, config, sanity_checks=False): + + # unpack values from hpbandster config + if self.is_hyperparameter_S: + self.S = config["S"] + if self.is_hyperparameter_H: + self.H = config["H"] + + n_deconv_layers = config["num_deconv_layers"] + n_fc_layers = config["num_linear_layers"] + + W_shapes = config["root_W_shapes"] ** 2 + + if type(W_shapes) is not list: + assert type(W_shapes) is int + W_shapes = [W_shapes for _ in range(n_fc_layers)] + + fc_activations = [config["fc_activation_{}".format(i + 1)] for i in range(n_fc_layers)] + dropouts = [config["dropout_{}".format(i + 1)] for i in range(n_fc_layers)] + dc_activations = [config["dc_activation_{}".format(i + 1)] for i in range(n_deconv_layers)] + n_filters = [config["num_filters_{}".format(i + 1)] for i in range(n_deconv_layers)] + batch_norms = [ + config["batch_norm_{}".format(i + 1)] if "batch_norm_1" in config.keys() else 0 + for i in range(n_deconv_layers) + ] + dropout_rate = config["dropout_rate"] + kernels = [ + config["num_kernels_{}".format(i + 1)] + for i in range(n_deconv_layers) + if "num_kernels_1" in config.keys() + ] + if sanity_checks: + # todo: use in testing + + # check expected argument length + assert len(fc_activations) == len( + [config[key] for key in config if "fc_activation_" in key] + ) + assert len(dropouts) == len( + [config[key] for key in config if "dropout_" in key and "rate" not in key] + ) + assert len(dc_activations) == len( + [config[key] for key in config if "dc_activation_" in key] + ) + assert len(n_filters) == len([config[key] for key in config if "num_filters_" in key]) + assert len(batch_norms) == len([config[key] for key in config if "batch_norm_" in key]) + + # if len(n_filters): + # assert n_filters[0] == 1 + + # check expected argument type + assert type(n_fc_layers) is int + assert type(n_deconv_layers) is int + + for a in fc_activations: + assert type(a) is str + for dr in dropouts: + assert dr in [0, 1] + for a in dc_activations: + assert type(a) is str + + for f in n_filters: + assert type(f) is int + + for bn in batch_norms: + assert bn in [0, 1] + + assert 1 >= dropout_rate >= 0 + + return ( + n_deconv_layers, + n_fc_layers, + W_shapes, + fc_activations, + dropouts, + dc_activations, + n_filters, + batch_norms, + dropout_rate, + kernels, + ) + + def handle_data(self, **kwargs): + # extract data from file + self.data_file = h5py.File(self.data_fname, "r") + try: + data = self.data_file["train_data"] + except KeyError: + data = self.data_file["data"] + + # extract validation data + if "val_data" in self.data_file.keys(): + self.valid_fname = self.data_fname + else: + self.valid_fname = None + + # infer data dimensionalities + self.N, self.D = data.shape + + # set data transform + if "data_transform" in kwargs.keys(): + self.data_transform = kwargs["data_transform"] + else: + self.data_transform = None + + def extract_hypers_from_config(self, config): + if self.is_hyperparameter_S: + try: + self.S = config["S"] + except KeyError: + raise KeyError( + "Number of states is not a hyperparameter, and none was passed " "to the init" + ) + + if self.is_hyperparameter_H: + try: + self.H = config["H"] + except KeyError: + raise KeyError( + "Number of initial hidden units is not a hyperparameter, and none " + "was passed to the init" + ) diff --git a/tvo/models/__init__.py b/tvo/models/__init__.py index 610f0df..1076a6b 100644 --- a/tvo/models/__init__.py +++ b/tvo/models/__init__.py @@ -1,8 +1,8 @@ from .noisyor import NoisyOR from .bsc import BSC -from .sssc import SSSC from .tvae import GaussianTVAE, BernoulliTVAE from .gmm import GMM from .pmm import PMM +from .sssc import SSSC __all__ = ["NoisyOR", "BSC", "SSSC", "GaussianTVAE", "BernoulliTVAE", "GMM", "PMM"]