From 57be8c9f46dc7d999998ab4bea6336302e9ba4ff Mon Sep 17 00:00:00 2001 From: Faried Abu Zaid Date: Wed, 8 Oct 2025 15:08:19 +0200 Subject: [PATCH 1/2] extend ablations --- experiments/cifar/cifar_complementary.yaml | 114 +++++++++++++ .../fashion/fashion_complementary.yaml | 114 +++++++++++++ .../fashionclasses_macow_complementary.yaml | 149 +++++++++++++++++ ...fashionclasses_veriflow_complementary.yaml | 140 ++++++++++++++++ experiments/mnist/mnist_complementary.yaml | 111 ++++++++++++ experiments/mnist/mnist_digits.yaml | 12 +- .../mnist/mnist_digits_complementary.yaml | 148 ++++++++++++++++ .../mnist_digits_macow_complementary.yaml | 158 ++++++++++++++++++ 8 files changed, 941 insertions(+), 5 deletions(-) create mode 100644 experiments/cifar/cifar_complementary.yaml create mode 100644 experiments/fashion/fashion_complementary.yaml create mode 100644 experiments/fashion/fashionclasses_macow_complementary.yaml create mode 100644 experiments/fashion/fashionclasses_veriflow_complementary.yaml create mode 100644 experiments/mnist/mnist_complementary.yaml create mode 100644 experiments/mnist/mnist_digits_complementary.yaml create mode 100644 experiments/mnist/mnist_digits_macow_complementary.yaml diff --git a/experiments/cifar/cifar_complementary.yaml b/experiments/cifar/cifar_complementary.yaml new file mode 100644 index 0000000..f61fd20 --- /dev/null +++ b/experiments/cifar/cifar_complementary.yaml @@ -0,0 +1,114 @@ +--- +__object__: src.explib.base.ExperimentCollection +name: fashion_ablation +experiments: + - &exp_rad_logN + __object__: src.explib.hyperopt.HyperoptExperiment + name: cfair_full_radial_logN + skip: true + device: cpu + skip: true + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 1 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.explib.datasets.Cifar10Split + params: + space_to_depth_factor: 4 + dataloc: /home/faried/Projects/USFlows/data/cifar10 + + epochs: &epochs 200000 + patience: &patience 1 + batch_size: &batch_size + __eval__: tune.choice([32]) + optim_cfg: &optim + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-3 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([10]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 48 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [48, 8, 8] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: pyro.distributions.Normal + loc: + __eval__: torch.zeros([48, 8, 8]).to("cpu") + scale: + __eval__: torch.ones([48, 8, 8]).to("cpu") + - &exp_normal + __overwrites__: *exp_rad_logN + name: fashion_full_laplace + skip: false + trial_config: + optim_cfg: + params: + lr: + __eval__: 1e-5 + model_cfg: + params: + lu_transform: 0 + affine_conjugation: false + base_distribution: + __object__: src.usflows.distributions.RadialDistribution + device: cpu + p: + __eval__: float("1") + loc: + __eval__: torch.zeros([48, 8, 8]).to("cpu") + norm_distribution: + __exact__: + __object__: src.usflows.distributions.LogNormal + loc: + __eval__: torch.ones([1]).to("cpu") * 6 + scale: + __eval__: torch.ones([1]).to("cpu") * .35 + device: cpu + diff --git a/experiments/fashion/fashion_complementary.yaml b/experiments/fashion/fashion_complementary.yaml new file mode 100644 index 0000000..05713af --- /dev/null +++ b/experiments/fashion/fashion_complementary.yaml @@ -0,0 +1,114 @@ +--- +__object__: src.explib.base.ExperimentCollection +name: fashion_ablation +experiments: + - &exp_rad_logN + __object__: src.explib.hyperopt.HyperoptExperiment + name: cfair_full_radial_logN + skip: true + device: cpu + skip: true + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 1 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.explib.datasets.FashionMnistSplit + params: + space_to_depth_factor: 4 + dataloc: /home/faried/Projects/USFlows/data/fashion + + epochs: &epochs 200000 + patience: &patience 1 + batch_size: &batch_size + __eval__: tune.choice([32]) + optim_cfg: &optim + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-3 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([10]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 48 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [48, 8, 8] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: pyro.distributions.Normal + loc: + __eval__: torch.zeros([48, 8, 8]).to("cpu") + scale: + __eval__: torch.ones([48, 8, 8]).to("cpu") + + - &exp_normal + __overwrites__: *exp_rad_logN + name: fashion_full_laplace + skip: false + trial_config: + optim_cfg: + params: + lr: + __eval__: 1e-5 + model_cfg: + params: + lu_transform: 0 + affine_conjugation: false + base_distribution: + __object__: src.usflows.distributions.RadialDistribution + device: cpu + p: + __eval__: float("1") + loc: + __eval__: torch.zeros([48, 8, 8]).to("cpu") + norm_distribution: + __object__: src.usflows.distributions.LogNormal + loc: + __eval__: torch.ones([1]).to("cpu") * 6 + scale: + __eval__: torch.ones([1]).to("cpu") * .35 + device: cpu + diff --git a/experiments/fashion/fashionclasses_macow_complementary.yaml b/experiments/fashion/fashionclasses_macow_complementary.yaml new file mode 100644 index 0000000..04cdcfe --- /dev/null +++ b/experiments/fashion/fashionclasses_macow_complementary.yaml @@ -0,0 +1,149 @@ +--- +__object__: src.usflows.explib.base.ExperimentCollection +name: fashion_ablation_macow +experiments: + - &exp_rad_logN + __object__: src.usflows.explib.hyperopt.HyperoptExperiment + name: cfair_full_normal + skip: false + device: cpu + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 1 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.usflow.explib.datasets.FashionMnistSplit + params: + space_to_depth_factor: 4 + dataloc: /home/faried/Projects/USFlows/data/fashion + label: 0 + epochs: &epochs 200000 + patience: &patience 1 + batch_size: &batch_size + __eval__: tune.choice([32]) + optim_cfg: &optim + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-3 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([10]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 16 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [16, 7, 7] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: src.usflows.distributions.RadialDistribution + device: cpu + p: + __eval__: float("1") + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + norm_distribution: + __object__: src.usflows.distributions.GammaMM + concentration: + __eval__: torch.rand([20]).to("cpu") * 75 + rate: + __eval__: torch.rand([20]).to("cpu") + mixture_weights: + __eval__: torch.ones([20]).to("cpu") / 20 + device: cpu + - __overwrites__: *exp_rad_logN + name: fashion1_radial_logN + trial_config: + dataset: + params: + label: 1 + - __overwrites__: *exp_rad_logN + name: fashion2_radial_logN + trial_config: + dataset: + params: + label: 2 + - __overwrites__: *exp_rad_logN + name: fashion3_radial_logN + trial_config: + dataset: + params: + label: 3 + - __overwrites__: *exp_rad_logN + name: fashion4_radial_logN + trial_config: + dataset: + params: + label: 4 + - __overwrites__: *exp_rad_logN + name: fashion5_radial_logN + trial_config: + dataset: + params: + label: 5 + - __overwrites__: *exp_rad_logN + name: fashion6_radial_logN + trial_config: + dataset: + params: + label: 6 + - __overwrites__: *exp_rad_logN + name: fashion7_radial_logN + trial_config: + dataset: + params: + label: 7 + - __overwrites__: *exp_rad_logN + name: fashion8_radial_logN + trial_config: + dataset: + params: + label: 8 + - __overwrites__: *exp_rad_logN + name: fashion9_radial_logN + trial_config: + dataset: + params: + label: 9 + \ No newline at end of file diff --git a/experiments/fashion/fashionclasses_veriflow_complementary.yaml b/experiments/fashion/fashionclasses_veriflow_complementary.yaml new file mode 100644 index 0000000..340bed9 --- /dev/null +++ b/experiments/fashion/fashionclasses_veriflow_complementary.yaml @@ -0,0 +1,140 @@ +--- +__object__: src.usflows.explib.base.ExperimentCollection +name: fashion_ablation_veriflow +experiments: + - &exp_rad_logN + __object__: src.usflows.explib.hyperopt.HyperoptExperiment + name: cfair_full_radial_logN + skip: false + device: cpu + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 1 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.usflows.explib.datasets.FashionMnistSplit + params: + space_to_depth_factor: 4 + dataloc: /home/faried/Projects/USFlows/data/fashion + label: 0 + epochs: &epochs 200000 + patience: &patience 1 + batch_size: &batch_size + __eval__: tune.choice([32]) + optim_cfg: &optim + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-3 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([10]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 16 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [16, 7, 7] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + + base_distribution: + __object__: pyro.distributions.Normal + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + scale: + __eval__: torch.ones([1]).to("cpu") + - __overwrites__: *exp_rad_logN + name: fashion1_radial_logN + trial_config: + dataset: + params: + label: 1 + - __overwrites__: *exp_rad_logN + name: fashion2_radial_logN + trial_config: + dataset: + params: + label: 2 + - __overwrites__: *exp_rad_logN + name: fashion3_radial_logN + trial_config: + dataset: + params: + label: 3 + - __overwrites__: *exp_rad_logN + name: fashion4_radial_logN + trial_config: + dataset: + params: + label: 4 + - __overwrites__: *exp_rad_logN + name: fashion5_radial_logN + trial_config: + dataset: + params: + label: 5 + - __overwrites__: *exp_rad_logN + name: fashion6_radial_logN + trial_config: + dataset: + params: + label: 6 + - __overwrites__: *exp_rad_logN + name: fashion7_radial_logN + trial_config: + dataset: + params: + label: 7 + - __overwrites__: *exp_rad_logN + name: fashion8_radial_logN + trial_config: + dataset: + params: + label: 8 + - __overwrites__: *exp_rad_logN + name: fashion9_radial_logN + trial_config: + dataset: + params: + label: 9 + \ No newline at end of file diff --git a/experiments/mnist/mnist_complementary.yaml b/experiments/mnist/mnist_complementary.yaml new file mode 100644 index 0000000..a88e17f --- /dev/null +++ b/experiments/mnist/mnist_complementary.yaml @@ -0,0 +1,111 @@ +--- +__object__: src.explib.base.ExperimentCollection +name: mnist_ablation +experiments: + - &exp_rad_logN + __object__: src.explib.hyperopt.HyperoptExperiment + name: mnist_full_radial_logN + device: cpu + skip: False + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 16 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.explib.datasets.MnistSplit + params: + dataloc: /home/faried/Projects/USFlows/data/mnist + space_to_depth_factor: 4 + device: cpu + epochs: 200000 + patience: 5 + batch_size: + __eval__: tune.choice([32]) + optim_cfg: + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-3 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([15]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 16 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [16, 7, 7] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: pyro.distributions.Normal + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + scale: + __eval__: torch.ones([16, 7, 7]).to("cpu") + - &exp_normal + __overwrites__: *exp_rad_logN + name: mnist_full_MACow + skip: false + trial_config: + optim_cfg: + params: + lr: + __eval__: 1e-5 + model_cfg: + params: + lu_transform: 0 + affine_conjugation: false + base_distribution: + __object__: src.usflows.distributions.RadialDistribution + device: cpu + p: + __eval__: float("1") + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + norm_distribution: + __object__: src.usflows.distributions.LogNormal + loc: + __eval__: torch.ones([1]).to("cpu") * 6 + scale: + __eval__: torch.ones([1]).to("cpu") * .35 + device: cpu diff --git a/experiments/mnist/mnist_digits.yaml b/experiments/mnist/mnist_digits.yaml index ea6e996..c8511b9 100644 --- a/experiments/mnist/mnist_digits.yaml +++ b/experiments/mnist/mnist_digits.yaml @@ -85,11 +85,13 @@ experiments: loc: __eval__: torch.zeros([16, 7, 7]).to("cpu") norm_distribution: - __object__: src.usflows.distributions.LogNormal - loc: - __eval__: torch.ones([1]).to("cpu") * 6 - scale: - __eval__: torch.ones([1]).to("cpu") * .35 + __object__: src.usflows.distributions.GammaMM + concentration: + __eval__: torch.rand([20]).to("cpu") * 75 + rate: + __eval__: torch.rand([20]).to("cpu") + mixture_weights: + __eval__: torch.ones([20]).to("cpu") / 20 device: cpu - __overwrites__: *exp_rad_logN diff --git a/experiments/mnist/mnist_digits_complementary.yaml b/experiments/mnist/mnist_digits_complementary.yaml new file mode 100644 index 0000000..e4e17ae --- /dev/null +++ b/experiments/mnist/mnist_digits_complementary.yaml @@ -0,0 +1,148 @@ +--- +__object__: src.explib.base.ExperimentCollection +name: mnist_gigits_logN +experiments: + - &exp_rad_logN + __object__: src.explib.hyperopt.HyperoptExperiment + name: mnist0 + device: cpu + skip: false + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 16 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.explib.datasets.MnistSplit + params: + dataloc: /home/faried/Projects/USFlows/data/mnist + space_to_depth_factor: 4 + device: cpu + digit: 0 + epochs: 200000 + patience: 5 + batch_size: + __eval__: tune.choice([32]) + optim_cfg: + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-4 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([5]) + lu_transform: 1 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 16 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [16, 7, 7] + affine_conjugation: true + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: pyro.distributions.Normal + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + scale: + __eval__: torch.ones([16, 7, 7]).to("cpu") + - + __overwrites__: *exp_rad_logN + name: mnist1 + trial_config: + dataset: + params: + digit: 1 + - + __overwrites__: *exp_rad_logN + name: mnist2 + trial_config: + dataset: + params: + digit: 2 + - + __overwrites__: *exp_rad_logN + name: mnist3 + trial_config: + dataset: + params: + digit: 3 + - + __overwrites__: *exp_rad_logN + name: mnist4 + trial_config: + dataset: + params: + digit: 4 + - + __overwrites__: *exp_rad_logN + name: mnist5 + trial_config: + dataset: + params: + digit: 5 + - + __overwrites__: *exp_rad_logN + name: mnist6 + trial_config: + dataset: + params: + digit: 6 + - + __overwrites__: *exp_rad_logN + name: mnist7 + trial_config: + dataset: + params: + digit: 7 + - + __overwrites__: *exp_rad_logN + name: mnist8 + trial_config: + dataset: + params: + digit: 8 + - + __overwrites__: *exp_rad_logN + name: mnist9 + trial_config: + dataset: + params: + digit: 9 diff --git a/experiments/mnist/mnist_digits_macow_complementary.yaml b/experiments/mnist/mnist_digits_macow_complementary.yaml new file mode 100644 index 0000000..1ecf485 --- /dev/null +++ b/experiments/mnist/mnist_digits_macow_complementary.yaml @@ -0,0 +1,158 @@ +--- +__object__: src.explib.base.ExperimentCollection +name: mnist_gigits_logN +experiments: + - &exp_rad_logN + __object__: src.explib.hyperopt.HyperoptExperiment + name: mnist0 + device: cpu + skip: false + scheduler: &scheduler + __object__: ray.tune.schedulers.ASHAScheduler + max_t: 1000000 + grace_period: 1000000 + reduction_factor: 2 + num_hyperopt_samples: &num_hyperopt_samples 1 + gpus_per_trial: &gpus_per_trial 0 + cpus_per_trial: &cpus_per_trial 16 + tuner_params: &tuner_params + metric: val_loss + mode: min + trial_config: + logging: + images: false + "image_shape": [28, 28] + dataset: &dataset + class: + __class__: src.explib.datasets.MnistSplit + params: + dataloc: /home/faried/Projects/USFlows/data/mnist + space_to_depth_factor: 4 + device: cpu + digit: 0 + epochs: 200000 + patience: 5 + batch_size: + __eval__: tune.choice([32]) + optim_cfg: + optimizer: + __class__: src.usflows.sophia.SophiaG + params: + lr: + __eval__: 1e-4 + weight_decay: 0.0 + + model_cfg: + type: + __class__: src.usflows.flows.USFlow + params: + soft_training: + __eval__: tune.choice([False]) + training_noise_prior: + __object__: pyro.distributions.Uniform + low: + __eval__: 1e-20 + high: 0.01 + prior_scale: 1.0 + coupling_blocks: + __eval__: tune.choice([5]) + lu_transform: 0 + householder: 0 + conditioner_cls: + __class__: src.usflows.networks.ConvNet2D + conditioner_args: + c_in: 16 + c_hidden: + __eval__: tune.choice([32]) + num_layers: + __eval__: tune.choice([3]) + padding: same + kernel_size: 3 + rescale_hidden: 1 + normalize_layers: + __eval__: tune.choice([True]) + gating: + __eval__: tune.choice([True]) + in_dims: [16, 7, 7] + affine_conjugation: false + nonlinearity: + __eval__: tune.choice([torch.nn.ReLU()]) + base_distribution: + __object__: src.usflows.distributions.RadialDistribution + device: cpu + p: + __eval__: float("1") + loc: + __eval__: torch.zeros([16, 7, 7]).to("cpu") + norm_distribution: + __object__: src.usflows.distributions.GammaMM + concentration: + __eval__: torch.rand([20]).to("cpu") * 75 + rate: + __eval__: torch.rand([20]).to("cpu") + mixture_weights: + __eval__: torch.ones([20]).to("cpu") / 20 + device: cpu + - + __overwrites__: *exp_rad_logN + name: mnist1 + trial_config: + dataset: + params: + digit: 1 + - + __overwrites__: *exp_rad_logN + name: mnist2 + trial_config: + dataset: + params: + digit: 2 + - + __overwrites__: *exp_rad_logN + name: mnist3 + trial_config: + dataset: + params: + digit: 3 + - + __overwrites__: *exp_rad_logN + name: mnist4 + trial_config: + dataset: + params: + digit: 4 + - + __overwrites__: *exp_rad_logN + name: mnist5 + trial_config: + dataset: + params: + digit: 5 + - + __overwrites__: *exp_rad_logN + name: mnist6 + trial_config: + dataset: + params: + digit: 6 + - + __overwrites__: *exp_rad_logN + name: mnist7 + trial_config: + dataset: + params: + digit: 7 + - + __overwrites__: *exp_rad_logN + name: mnist8 + trial_config: + dataset: + params: + digit: 8 + - + __overwrites__: *exp_rad_logN + name: mnist9 + trial_config: + dataset: + params: + digit: 9 From 91c6eca7c1cfb590cb945f67740886e301f9ec26 Mon Sep 17 00:00:00 2001 From: Faried Abu Zaid Date: Wed, 8 Oct 2025 15:15:30 +0200 Subject: [PATCH 2/2] Set device cuda --- experiments/cifar/cifar_complementary.yaml | 18 ++++++++--------- .../fashion/fashion_complementary.yaml | 18 ++++++++--------- .../fashionclasses_macow_complementary.yaml | 16 +++++++-------- ...fashionclasses_veriflow_complementary.yaml | 6 +++--- experiments/mnist/mnist_complementary.yaml | 20 +++++++++---------- .../mnist/mnist_digits_complementary.yaml | 10 +++++----- .../mnist_digits_macow_complementary.yaml | 18 ++++++++--------- 7 files changed, 53 insertions(+), 53 deletions(-) diff --git a/experiments/cifar/cifar_complementary.yaml b/experiments/cifar/cifar_complementary.yaml index f61fd20..7bc8a9a 100644 --- a/experiments/cifar/cifar_complementary.yaml +++ b/experiments/cifar/cifar_complementary.yaml @@ -6,7 +6,7 @@ experiments: __object__: src.explib.hyperopt.HyperoptExperiment name: cfair_full_radial_logN skip: true - device: cpu + device: cuda skip: true scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler @@ -14,7 +14,7 @@ experiments: grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 1 tuner_params: &tuner_params metric: val_loss @@ -80,9 +80,9 @@ experiments: base_distribution: __object__: pyro.distributions.Normal loc: - __eval__: torch.zeros([48, 8, 8]).to("cpu") + __eval__: torch.zeros([48, 8, 8]).to("cuda") scale: - __eval__: torch.ones([48, 8, 8]).to("cpu") + __eval__: torch.ones([48, 8, 8]).to("cuda") - &exp_normal __overwrites__: *exp_rad_logN name: fashion_full_laplace @@ -98,17 +98,17 @@ experiments: affine_conjugation: false base_distribution: __object__: src.usflows.distributions.RadialDistribution - device: cpu + device: cuda p: __eval__: float("1") loc: - __eval__: torch.zeros([48, 8, 8]).to("cpu") + __eval__: torch.zeros([48, 8, 8]).to("cuda") norm_distribution: __exact__: __object__: src.usflows.distributions.LogNormal loc: - __eval__: torch.ones([1]).to("cpu") * 6 + __eval__: torch.ones([1]).to("cuda") * 6 scale: - __eval__: torch.ones([1]).to("cpu") * .35 - device: cpu + __eval__: torch.ones([1]).to("cuda") * .35 + device: cuda diff --git a/experiments/fashion/fashion_complementary.yaml b/experiments/fashion/fashion_complementary.yaml index 05713af..3c9faf9 100644 --- a/experiments/fashion/fashion_complementary.yaml +++ b/experiments/fashion/fashion_complementary.yaml @@ -6,7 +6,7 @@ experiments: __object__: src.explib.hyperopt.HyperoptExperiment name: cfair_full_radial_logN skip: true - device: cpu + device: cuda skip: true scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler @@ -14,7 +14,7 @@ experiments: grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 1 tuner_params: &tuner_params metric: val_loss @@ -80,9 +80,9 @@ experiments: base_distribution: __object__: pyro.distributions.Normal loc: - __eval__: torch.zeros([48, 8, 8]).to("cpu") + __eval__: torch.zeros([48, 8, 8]).to("cuda") scale: - __eval__: torch.ones([48, 8, 8]).to("cpu") + __eval__: torch.ones([48, 8, 8]).to("cuda") - &exp_normal __overwrites__: *exp_rad_logN @@ -99,16 +99,16 @@ experiments: affine_conjugation: false base_distribution: __object__: src.usflows.distributions.RadialDistribution - device: cpu + device: cuda p: __eval__: float("1") loc: - __eval__: torch.zeros([48, 8, 8]).to("cpu") + __eval__: torch.zeros([48, 8, 8]).to("cuda") norm_distribution: __object__: src.usflows.distributions.LogNormal loc: - __eval__: torch.ones([1]).to("cpu") * 6 + __eval__: torch.ones([1]).to("cuda") * 6 scale: - __eval__: torch.ones([1]).to("cpu") * .35 - device: cpu + __eval__: torch.ones([1]).to("cuda") * .35 + device: cuda diff --git a/experiments/fashion/fashionclasses_macow_complementary.yaml b/experiments/fashion/fashionclasses_macow_complementary.yaml index 04cdcfe..617be19 100644 --- a/experiments/fashion/fashionclasses_macow_complementary.yaml +++ b/experiments/fashion/fashionclasses_macow_complementary.yaml @@ -6,14 +6,14 @@ experiments: __object__: src.usflows.explib.hyperopt.HyperoptExperiment name: cfair_full_normal skip: false - device: cpu + device: cuda scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler max_t: 1000000 grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 1 tuner_params: &tuner_params metric: val_loss @@ -78,20 +78,20 @@ experiments: __eval__: tune.choice([torch.nn.ReLU()]) base_distribution: __object__: src.usflows.distributions.RadialDistribution - device: cpu + device: cuda p: __eval__: float("1") loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") norm_distribution: __object__: src.usflows.distributions.GammaMM concentration: - __eval__: torch.rand([20]).to("cpu") * 75 + __eval__: torch.rand([20]).to("cuda") * 75 rate: - __eval__: torch.rand([20]).to("cpu") + __eval__: torch.rand([20]).to("cuda") mixture_weights: - __eval__: torch.ones([20]).to("cpu") / 20 - device: cpu + __eval__: torch.ones([20]).to("cuda") / 20 + device: cuda - __overwrites__: *exp_rad_logN name: fashion1_radial_logN trial_config: diff --git a/experiments/fashion/fashionclasses_veriflow_complementary.yaml b/experiments/fashion/fashionclasses_veriflow_complementary.yaml index 340bed9..482a871 100644 --- a/experiments/fashion/fashionclasses_veriflow_complementary.yaml +++ b/experiments/fashion/fashionclasses_veriflow_complementary.yaml @@ -6,7 +6,7 @@ experiments: __object__: src.usflows.explib.hyperopt.HyperoptExperiment name: cfair_full_radial_logN skip: false - device: cpu + device: cuda scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler max_t: 1000000 @@ -80,9 +80,9 @@ experiments: base_distribution: __object__: pyro.distributions.Normal loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") scale: - __eval__: torch.ones([1]).to("cpu") + __eval__: torch.ones([1]).to("cuda") - __overwrites__: *exp_rad_logN name: fashion1_radial_logN trial_config: diff --git a/experiments/mnist/mnist_complementary.yaml b/experiments/mnist/mnist_complementary.yaml index a88e17f..7f9e569 100644 --- a/experiments/mnist/mnist_complementary.yaml +++ b/experiments/mnist/mnist_complementary.yaml @@ -5,7 +5,7 @@ experiments: - &exp_rad_logN __object__: src.explib.hyperopt.HyperoptExperiment name: mnist_full_radial_logN - device: cpu + device: cuda skip: False scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler @@ -13,7 +13,7 @@ experiments: grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 16 tuner_params: &tuner_params metric: val_loss @@ -28,7 +28,7 @@ experiments: params: dataloc: /home/faried/Projects/USFlows/data/mnist space_to_depth_factor: 4 - device: cpu + device: cuda epochs: 200000 patience: 5 batch_size: @@ -79,9 +79,9 @@ experiments: base_distribution: __object__: pyro.distributions.Normal loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") scale: - __eval__: torch.ones([16, 7, 7]).to("cpu") + __eval__: torch.ones([16, 7, 7]).to("cuda") - &exp_normal __overwrites__: *exp_rad_logN name: mnist_full_MACow @@ -97,15 +97,15 @@ experiments: affine_conjugation: false base_distribution: __object__: src.usflows.distributions.RadialDistribution - device: cpu + device: cuda p: __eval__: float("1") loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") norm_distribution: __object__: src.usflows.distributions.LogNormal loc: - __eval__: torch.ones([1]).to("cpu") * 6 + __eval__: torch.ones([1]).to("cuda") * 6 scale: - __eval__: torch.ones([1]).to("cpu") * .35 - device: cpu + __eval__: torch.ones([1]).to("cuda") * .35 + device: cuda diff --git a/experiments/mnist/mnist_digits_complementary.yaml b/experiments/mnist/mnist_digits_complementary.yaml index e4e17ae..63acf9b 100644 --- a/experiments/mnist/mnist_digits_complementary.yaml +++ b/experiments/mnist/mnist_digits_complementary.yaml @@ -5,7 +5,7 @@ experiments: - &exp_rad_logN __object__: src.explib.hyperopt.HyperoptExperiment name: mnist0 - device: cpu + device: cuda skip: false scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler @@ -13,7 +13,7 @@ experiments: grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 16 tuner_params: &tuner_params metric: val_loss @@ -28,7 +28,7 @@ experiments: params: dataloc: /home/faried/Projects/USFlows/data/mnist space_to_depth_factor: 4 - device: cpu + device: cuda digit: 0 epochs: 200000 patience: 5 @@ -80,9 +80,9 @@ experiments: base_distribution: __object__: pyro.distributions.Normal loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") scale: - __eval__: torch.ones([16, 7, 7]).to("cpu") + __eval__: torch.ones([16, 7, 7]).to("cuda") - __overwrites__: *exp_rad_logN name: mnist1 diff --git a/experiments/mnist/mnist_digits_macow_complementary.yaml b/experiments/mnist/mnist_digits_macow_complementary.yaml index 1ecf485..aa15da4 100644 --- a/experiments/mnist/mnist_digits_macow_complementary.yaml +++ b/experiments/mnist/mnist_digits_macow_complementary.yaml @@ -5,7 +5,7 @@ experiments: - &exp_rad_logN __object__: src.explib.hyperopt.HyperoptExperiment name: mnist0 - device: cpu + device: cuda skip: false scheduler: &scheduler __object__: ray.tune.schedulers.ASHAScheduler @@ -13,7 +13,7 @@ experiments: grace_period: 1000000 reduction_factor: 2 num_hyperopt_samples: &num_hyperopt_samples 1 - gpus_per_trial: &gpus_per_trial 0 + gpus_per_trial: &gpus_per_trial 1 cpus_per_trial: &cpus_per_trial 16 tuner_params: &tuner_params metric: val_loss @@ -28,7 +28,7 @@ experiments: params: dataloc: /home/faried/Projects/USFlows/data/mnist space_to_depth_factor: 4 - device: cpu + device: cuda digit: 0 epochs: 200000 patience: 5 @@ -79,20 +79,20 @@ experiments: __eval__: tune.choice([torch.nn.ReLU()]) base_distribution: __object__: src.usflows.distributions.RadialDistribution - device: cpu + device: cuda p: __eval__: float("1") loc: - __eval__: torch.zeros([16, 7, 7]).to("cpu") + __eval__: torch.zeros([16, 7, 7]).to("cuda") norm_distribution: __object__: src.usflows.distributions.GammaMM concentration: - __eval__: torch.rand([20]).to("cpu") * 75 + __eval__: torch.rand([20]).to("cuda") * 75 rate: - __eval__: torch.rand([20]).to("cpu") + __eval__: torch.rand([20]).to("cuda") mixture_weights: - __eval__: torch.ones([20]).to("cpu") / 20 - device: cpu + __eval__: torch.ones([20]).to("cuda") / 20 + device: cuda - __overwrites__: *exp_rad_logN name: mnist1