From 902c118bf1d1078200c7714a8c8ef8778be7cdcc Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 15 Nov 2021 16:04:32 -0500 Subject: [PATCH 001/150] init deterministic.py for segmenter loader using scenic loader of cityscapes --- experimental/cityscapes/README.md | 6 + experimental/cityscapes/deterministic.py | 124 ++++++++++++++++++ .../imagenet21k_segmenter_cityscapes.py | 120 +++++++++++++++++ experimental/cityscapes/train_utils.py | 62 +++++++++ 4 files changed, 312 insertions(+) create mode 100644 experimental/cityscapes/README.md create mode 100644 experimental/cityscapes/deterministic.py create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py create mode 100644 experimental/cityscapes/train_utils.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md new file mode 100644 index 000000000..a4a216f87 --- /dev/null +++ b/experimental/cityscapes/README.md @@ -0,0 +1,6 @@ + +# Log + +[] include loading cityscapes dataset + +[] include deterministic training for segmenter diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py new file mode 100644 index 000000000..0bb458845 --- /dev/null +++ b/experimental/cityscapes/deterministic.py @@ -0,0 +1,124 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Train vit model on cityscapes. + +Step 1: aim to train model on cityscapes for 1 step +# Runs with + +""" + +from functools import partial # pylint: disable=g-importing-member so standard +import itertools +import multiprocessing +import numbers +import os +#%% +from absl import app +from absl import flags +from absl import logging +from clu import metric_writers +from clu import parameter_overview +from clu import periodic_actions +from clu import preprocess_spec +#%% +import flax +import flax.jax_utils as flax_utils +import jax +import jax.numpy as jnp +from ml_collections.config_flags import config_flags +import numpy as np +import robustness_metrics as rm +#%% +import tensorflow as tf +import train_utils # local file import + +fewshot = None + +#%% +config_flags.DEFINE_config_file( + 'config', None, 'Training configuration.', lock_config=True) +flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') +flags.DEFINE_integer( + 'num_cores', default=None, help='Unused. How many devices being used.') +flags.DEFINE_boolean( + 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') +flags.DEFINE_string('tpu', None, + 'Unused. Name of the TPU. Only used if use_gpu is False.') +flags.DEFINE_string('dataset_service_address', None, + 'Address of the tf.data service') +FLAGS = flags.FLAGS + + +#%% +def main(config, output_dir): + + seed = config.get('rng_seed', 0) + rng = jax.random.PRNGKey(seed) + tf.random.set_seed(seed) + + """ + if config.get('data_dir'): + logging.info('data_dir=%s', config.data_dir) + logging.info('Output dir: %s', output_dir) + + save_checkpoint_path = None + if config.get('checkpoint_steps'): + gfile.makedirs(output_dir) + save_checkpoint_path = os.path.join(output_dir, 'checkpoint.npz') + + # Create an asynchronous multi-metric writer. + writer = metric_writers.create_default_writer( + output_dir, just_logging=jax.process_index() > 0) + + # The pool is used to perform misc operations such as logging in async way. + pool = multiprocessing.pool.ThreadPool() + + def write_note(note): + if jax.host_id() == 0: + logging.info('NOTE: %s', note) + write_note('Initializing...') + # Verify settings to make sure no checkpoints are accidentally missed. + if config.get('keep_checkpoint_steps'): + assert config.get('checkpoint_steps'), 'Specify `checkpoint_steps`.' + assert config.keep_checkpoint_steps % config.checkpoint_steps == 0, ( + f'`keep_checkpoint_steps` ({config.checkpoint_steps}) should be' + f'divisible by `checkpoint_steps ({config.checkpoint_steps}).`') + """ + # Train dataset configs + data_rng, rng = jax.random.split(rng) + + # Load dataset + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + return + + +if __name__ == '__main__': + # Adds jax flags to the program. + jax.config.config_with_absl() + + # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` + # function that returns values for tests and does not directly access flags, + # and then have `main` return None. + + def _main(unused_argv): + config = FLAGS.config + output_dir = FLAGS.output_dir + main(config, output_dir) + + app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py new file mode 100644 index 000000000..d39c8bd2b --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -0,0 +1,120 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + config.args = {} + + # training + config.rng_seed = 0 + config.batch_size = 1 + + return config + + +def get_config_deprecated(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + #config.dataset_name = 'cityscapes' + #config.val_split = 'full[:102400]' + #config.train_split = 'full[102400:]' + #config.train_split = 'full[10:]' + #config.num_classes = 34 + #config.init_head_bias = -10.0 + + config.trial = 0 + config.batch_size = 1 + config.num_epochs = 1 + + # what do these mean? + #pp_common = '|value_range(-1, 1)' + #config.pp_train = 'decode_jpeg_and_inception_crop(224)|flip_lr' + pp_common + #config.pp_train += f'|onehot({config.num_classes}, on=0.9999, off=0.0001)' + #config.pp_eval = 'decode|resize_small(256)|central_crop(224)' + pp_common + #config.pp_eval += f'|onehot({config.num_classes})' + #config.shuffle_buffer_size = 250_000 # Per host, so small-ish is ok. + config.shuffle_buffer_size = 10 + config.log_training_steps = 1 #000 + config.log_eval_steps = 1 #0000 + # NOTE: eval is very fast O(seconds) so it's fine to run it often. + config.checkpoint_steps = 1#7250 + config.checkpoint_timeout = 10 + + # Model section + config.model = ml_collections.ConfigDict() + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = [16, 16] + config.model.hidden_size = 768 + config.model.transformer = ml_collections.ConfigDict() + config.model.transformer.attention_dropout_rate = 0. + config.model.transformer.dropout_rate = 0.1 + config.model.transformer.mlp_dim = 3072 + config.model.transformer.num_heads = 12 + config.model.transformer.num_layers = 12 + config.model.classifier = 'token' # Or 'gap' + config.model.representation_size = 768 + # Optimizer section + config.optim_name = 'Adam' + config.optim = ml_collections.ConfigDict() + config.optim.weight_decay = 0.03 + + # TODO(lbeyer): make a mini-language like preprocessings. + config.lr = ml_collections.ConfigDict() + config.lr.base = 0.001 # LR has to be lower for larger models! + config.lr.warmup_steps = 10_000 + config.lr.decay_type = 'linear' + config.lr.linear_end = 1e-5 + + config.args = {} + return config + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/train_utils.py b/experimental/cityscapes/train_utils.py new file mode 100644 index 000000000..1569ea0cc --- /dev/null +++ b/experimental/cityscapes/train_utils.py @@ -0,0 +1,62 @@ + +""" +Load cityscapes dataset +""" +from absl import logging +from typing import Any, Callable, Dict, Tuple, Sequence, Optional, Mapping, Union +import jax +import jax.numpy as jnp +import ml_collections + +PRNGKey = jnp.ndarray + +from scenic.dataset_lib import datasets + + +def get_dataset(config: ml_collections.ConfigDict, data_rng: PRNGKey, *, + dataset_service_address: Optional[str] = None): + """Creates dataset from config. + Edited from + https://github.com/google-research/scenic/blob/c3ae6d7b5dc829fafe204a92522a5983959561a0/scenic/train_lib/train_utils.py#L145 + """ + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + dataset_builder = datasets.get_dataset(config.dataset_name) + + batch_size = config.batch_size + if batch_size % device_count > 0: + raise ValueError(f'Batch size ({batch_size}) must be divisible by the ' + f'number of devices ({device_count})') + + eval_batch_size = config.get('eval_batch_size', batch_size) + if eval_batch_size % device_count > 0: + raise ValueError(f'Eval batch size ({eval_batch_size}) must be divisible ' + f'by the number of devices ({device_count})') + + local_batch_size = batch_size // jax.process_count() + eval_local_batch_size = eval_batch_size // jax.process_count() + device_batch_size = batch_size // device_count + logging.info('local_batch_size : %d', local_batch_size) + logging.info('device_batch_size : %d', device_batch_size) + + shuffle_seed = config.get('shuffle_seed', None) + if dataset_service_address and shuffle_seed is not None: + raise ValueError('Using dataset service with a random seed causes each ' + 'worker to produce exactly the same data. Add ' + 'config.shuffle_seed = None to your config if you want ' + 'to run with dataset service.') + + dataset = dataset_builder( + batch_size=local_batch_size, + eval_batch_size=eval_local_batch_size, + num_shards=jax.local_device_count(), + dtype_str=config.data_dtype_str, + rng=data_rng, + shuffle_seed=shuffle_seed, + dataset_configs=config.get('dataset_configs'), + dataset_service_address=dataset_service_address) + + return dataset \ No newline at end of file From 82bd6c921a68a91aa28286215036d7752eb9f807 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 15 Nov 2021 17:02:57 -0500 Subject: [PATCH 002/150] fix bug in segmenter: vit+ backbone classifier was not inherited --- uncertainty_baselines/models/segmenter.py | 2 +- uncertainty_baselines/models/segmenter_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/uncertainty_baselines/models/segmenter.py b/uncertainty_baselines/models/segmenter.py index 45f7c7e7f..a895fc7d8 100644 --- a/uncertainty_baselines/models/segmenter.py +++ b/uncertainty_baselines/models/segmenter.py @@ -336,7 +336,7 @@ def __call__(self, x: jnp.ndarray, *, train: bool, debug: bool = False): hidden_size=self.backbone_configs.hidden_size, dropout_rate=self.backbone_configs.dropout_rate, attention_dropout_rate=self.backbone_configs.attention_dropout_rate, - classifier='gap', + classifier=self.backbone_configs.classifier, name='backbone')( x, train=train) else: diff --git a/uncertainty_baselines/models/segmenter_test.py b/uncertainty_baselines/models/segmenter_test.py index 1cab608e0..028c3d878 100644 --- a/uncertainty_baselines/models/segmenter_test.py +++ b/uncertainty_baselines/models/segmenter_test.py @@ -44,6 +44,7 @@ def test_segmenter_transformer(self, num_classes, hidden_size, img_h, img_w): config.backbone_configs.mlp_dim = 2 config.backbone_configs.num_heads = 1 config.backbone_configs.num_layers = 1 + config.backbone_configs.classifier = 'token' config.decoder_configs = ml_collections.ConfigDict() config.decoder_configs.type = 'linear' From fe1f68d7ef1a618cd21b07463b32d6089039f7ab Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 15 Nov 2021 21:55:41 -0500 Subject: [PATCH 003/150] include data and models in deterministic.py --- experimental/cityscapes/custom_models.py | 38 +++++ experimental/cityscapes/deterministic.py | 161 ++++++++++++++---- .../imagenet21k_segmenter_cityscapes.py | 134 +++++++-------- experimental/cityscapes/train_utils.py | 62 ------- .../models/segmenter_test.py | 3 +- 5 files changed, 231 insertions(+), 167 deletions(-) create mode 100644 experimental/cityscapes/custom_models.py delete mode 100644 experimental/cityscapes/train_utils.py diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py new file mode 100644 index 000000000..0b3824733 --- /dev/null +++ b/experimental/cityscapes/custom_models.py @@ -0,0 +1,38 @@ +""" +Custom models which allow for model inheritance +""" + +from scenic.model_lib.base_models.segmentation_model import SegmentationModel +from uncertainty_baselines.models.segmenter import SegVit +import ml_collections + +class SegmenterSegmentationModel(SegmentationModel): + """Segmenter model for segmentation task.""" + + def build_flax_model(self): + """ + return SegVit( + num_classes=self.dataset_meta_data['num_classes'], + patches=self.config.model.get('patches', {}), + backbone_configs=self.config.model.get('backbone_configs', {}), + decoder_configs=self.config.model.get('decoder_configs', {})) + """ + return SegVit( + num_classes=self.num_classes, + patches=self.config.patches, + backbone_configs=self.config.backbone_configs, + decoder_configs=self.config.model.decoder_configs) + + def default_flax_model_config(self): + raise NotImplementedError() + """ + return ml_collections.ConfigDict({ + 'model': + dict( + num_classes=19, + patches=patches, + block_size=(64, 128, 256, 512), + data_dtype_str='float32') + }) + """ + diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index 0bb458845..00b45d26c 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -20,12 +20,14 @@ # Runs with """ +import functools from functools import partial # pylint: disable=g-importing-member so standard import itertools import multiprocessing import numbers import os +import sys #%% from absl import app from absl import flags @@ -44,10 +46,16 @@ import robustness_metrics as rm #%% import tensorflow as tf -import train_utils # local file import +#import train_utils # local file import +import uncertainty_baselines as ub -fewshot = None +# scenic dependencies for debugging +from scenic.train_lib import lr_schedules +from scenic.train_lib import optimizers +from scenic.train_lib import train_utils +from flax import jax_utils +import custom_models #%% config_flags.DEFINE_config_file( 'config', None, 'Training configuration.', lock_config=True) @@ -63,48 +71,141 @@ FLAGS = flags.FLAGS +def write_note(note): + if jax.process_index() == 0: + logging.info('NOTE: %s', note) #%% def main(config, output_dir): - seed = config.get('rng_seed', 0) rng = jax.random.PRNGKey(seed) tf.random.set_seed(seed) - """ - if config.get('data_dir'): - logging.info('data_dir=%s', config.data_dir) - logging.info('Output dir: %s', output_dir) - - save_checkpoint_path = None - if config.get('checkpoint_steps'): - gfile.makedirs(output_dir) - save_checkpoint_path = os.path.join(output_dir, 'checkpoint.npz') - - # Create an asynchronous multi-metric writer. - writer = metric_writers.create_default_writer( - output_dir, just_logging=jax.process_index() > 0) - - # The pool is used to perform misc operations such as logging in async way. - pool = multiprocessing.pool.ThreadPool() - - def write_note(note): - if jax.host_id() == 0: - logging.info('NOTE: %s', note) write_note('Initializing...') - # Verify settings to make sure no checkpoints are accidentally missed. - if config.get('keep_checkpoint_steps'): - assert config.get('checkpoint_steps'), 'Specify `checkpoint_steps`.' - assert config.keep_checkpoint_steps % config.checkpoint_steps == 0, ( - f'`keep_checkpoint_steps` ({config.checkpoint_steps}) should be' - f'divisible by `checkpoint_steps ({config.checkpoint_steps}).`') - """ + # Train dataset configs data_rng, rng = jax.random.split(rng) + # ---------------------- # Load dataset + # ---------------------- + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + write_note('Loading dataset...') + dataset = train_utils.get_dataset( config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + # ---------------------- + # Define model + # ---------------------- + write_note('Creating model...') + model = ub.models.segmenter_transformer( + num_classes=config.num_classes, + patches=config.patches, + backbone_configs=config.backbone_configs, + decoder_configs=config.decoder_configs + ) + # ---------------------- + # Initialize model + # ---------------------- + # Here we follow the scenic/model_lib/base_models/segmentation_model.py + from scenic.train_lib.train_utils import initialize_model + """ + #TODO(kellybuchanan): update local_batch_size according to train_utils + local_batch_size = 1 + @partial(jax.jit, backend='cpu') + def init(rng): + #image_size = tuple(train_ds.element_spec['image'].shape[2:]) + image_size = config.dataset_configs.target_size + (3,) + logging.info('image_size = %s', image_size) + dummy_input = jnp.zeros((local_batch_size,) + image_size, jnp.float32) + params = flax.core.unfreeze(model.init(rng, dummy_input, + train=False))['params'] + + return params + rng, init_rng = jax.random.split(rng) + params_cpu = init(init_rng) + """ + rng, init_rng = jax.random.split(rng) + (params, model_state, num_trainable_params, + gflops) = train_utils.initialize_model( + model_def=model, #.flax_model, + input_spec=[(dataset.meta_data['input_shape'], + dataset.meta_data.get('input_dtype', jnp.float32))], + config=config, + rngs=init_rng) + + # ---------------------- + # Create optimizer + # ---------------------- + """ + # Load the optimizer from flax. + opt_name = config.get('optimizer') + write_note(f'Initializing {opt_name} optimizer...') + opt_def = getattr(flax.optim, opt_name)(**config.get('optimizer_configs', {})) + + # We jit this, such that the arrays that are created are created on the same + # device as the input is, in this case the CPU. Else they'd be on device[0]. + opt_cpu = jax.jit(opt_def.create)(params_cpu) + """ + optimizer = jax.jit( + optimizers.get_optimizer(config).create, backend='cpu')( + params) + rng, train_rng = jax.random.split(rng) + train_state = train_utils.TrainState( + global_step=0, + optimizer=optimizer, + model_state=model_state, + rng=train_rng, + accum_train_time=0) + + start_step = train_state.global_step + + if config.checkpoint: + train_state, start_step = train_utils.restore_checkpoint( + workdir, train_state) + # Replicate the optimzier, state, and rng. + train_state = jax_utils.replicate(train_state) + del params # Do not keep a copy of the initial params. + + # Calculate the total number of training steps. + total_steps, steps_per_epoch = train_utils.get_num_training_steps( + config, dataset.meta_data) + # Get learning rate scheduler. + learning_rate_fn = lr_schedules.get_learning_rate_fn(config) + + # --- STOP --- + # TODO: debug train_step in scenic/train_lib/segmentation_trainer.py + # import pdb; pdb.set_trace() + + train_step_pmapped = jax.pmap( + functools.partial( + train_step, + flax_model=model,#.flax_model, + learning_rate_fn=learning_rate_fn, + loss_fn=model.loss_function, + metrics_fn=model.get_metrics_fn('train'), + config=config, + debug=config.debug_train), + axis_name='batch', + # We can donate both buffers of train_state and train_batch. + donate_argnums=(0, 1), + ) + + + #dummy_input = jnp.zeros((local_batch_size,) + image_size, jnp.float32) + + #inputs = jnp.ones([num_examples, img_h, img_w, 3], jnp.float32) + #model = ub.models.segmenter_transformer(**config) + #key = jax.random.PRNGKey(0) + #variables = model.init(key, inputs, train=False) + #logits, outputs = model.apply(variables, inputs, train=False) + #variables = model.init(rng, inputs, train=False) + #logits, outputs = model.apply(variables, inputs, train=False) + return diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index d39c8bd2b..4ac8aff8c 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -22,6 +22,9 @@ import ml_collections #import get_fewshot # local file import +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = True + def get_config(): """Config for training a patch-transformer on JFT.""" @@ -29,92 +32,75 @@ def get_config(): config.experiment_name = 'cityscapes_segvit_ub' - config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = (1024, 2048) - - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - config.args = {} + config.dataset_configs.target_size = (512, 512) + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = [4, 4] + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + if DEBUG: + config.backbone_configs.mlp_dim = 2 + config.backbone_configs.num_heads = 1 + config.backbone_configs.num_layers = 1 + config.backbone_configs.hidden_size = 1 + else: + config.backbone_configs.mlp_dim = 3072 + config.backbone_configs.num_heads = 12 + config.backbone_configs.num_layers = 12 + config.backbone_configs.hidden_size = 768 + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' # training - config.rng_seed = 0 + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + num_training_epochs = 1 # ml_collections.FieldReference(100) + config.num_training_epochs = num_training_epochs config.batch_size = 1 - - return config - - -def get_config_deprecated(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = (1024, 2048) - + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 # model and data dtype config.model_dtype_str = 'float32' config.data_dtype_str = 'float32' - #config.dataset_name = 'cityscapes' - #config.val_split = 'full[:102400]' - #config.train_split = 'full[102400:]' - #config.train_split = 'full[10:]' - #config.num_classes = 34 - #config.init_head_bias = -10.0 - - config.trial = 0 - config.batch_size = 1 - config.num_epochs = 1 - - # what do these mean? - #pp_common = '|value_range(-1, 1)' - #config.pp_train = 'decode_jpeg_and_inception_crop(224)|flip_lr' + pp_common - #config.pp_train += f'|onehot({config.num_classes}, on=0.9999, off=0.0001)' - #config.pp_eval = 'decode|resize_small(256)|central_crop(224)' + pp_common - #config.pp_eval += f'|onehot({config.num_classes})' - #config.shuffle_buffer_size = 250_000 # Per host, so small-ish is ok. - config.shuffle_buffer_size = 10 - config.log_training_steps = 1 #000 - config.log_eval_steps = 1 #0000 - # NOTE: eval is very fast O(seconds) so it's fine to run it often. - config.checkpoint_steps = 1#7250 - config.checkpoint_timeout = 10 - - # Model section - config.model = ml_collections.ConfigDict() - config.model.patches = ml_collections.ConfigDict() - config.model.patches.size = [16, 16] - config.model.hidden_size = 768 - config.model.transformer = ml_collections.ConfigDict() - config.model.transformer.attention_dropout_rate = 0. - config.model.transformer.dropout_rate = 0.1 - config.model.transformer.mlp_dim = 3072 - config.model.transformer.num_heads = 12 - config.model.transformer.num_layers = 12 - config.model.classifier = 'token' # Or 'gap' - config.model.representation_size = 768 - # Optimizer section - config.optim_name = 'Adam' - config.optim = ml_collections.ConfigDict() - config.optim.weight_decay = 0.03 - - # TODO(lbeyer): make a mini-language like preprocessings. - config.lr = ml_collections.ConfigDict() - config.lr.base = 0.001 # LR has to be lower for larger models! - config.lr.warmup_steps = 10_000 - config.lr.decay_type = 'linear' - config.lr.linear_end = 1e-5 + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = False # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + + # extra config.args = {} + return config + def get_sweep(hyper): return hyper.product([]) diff --git a/experimental/cityscapes/train_utils.py b/experimental/cityscapes/train_utils.py deleted file mode 100644 index 1569ea0cc..000000000 --- a/experimental/cityscapes/train_utils.py +++ /dev/null @@ -1,62 +0,0 @@ - -""" -Load cityscapes dataset -""" -from absl import logging -from typing import Any, Callable, Dict, Tuple, Sequence, Optional, Mapping, Union -import jax -import jax.numpy as jnp -import ml_collections - -PRNGKey = jnp.ndarray - -from scenic.dataset_lib import datasets - - -def get_dataset(config: ml_collections.ConfigDict, data_rng: PRNGKey, *, - dataset_service_address: Optional[str] = None): - """Creates dataset from config. - Edited from - https://github.com/google-research/scenic/blob/c3ae6d7b5dc829fafe204a92522a5983959561a0/scenic/train_lib/train_utils.py#L145 - """ - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - dataset_builder = datasets.get_dataset(config.dataset_name) - - batch_size = config.batch_size - if batch_size % device_count > 0: - raise ValueError(f'Batch size ({batch_size}) must be divisible by the ' - f'number of devices ({device_count})') - - eval_batch_size = config.get('eval_batch_size', batch_size) - if eval_batch_size % device_count > 0: - raise ValueError(f'Eval batch size ({eval_batch_size}) must be divisible ' - f'by the number of devices ({device_count})') - - local_batch_size = batch_size // jax.process_count() - eval_local_batch_size = eval_batch_size // jax.process_count() - device_batch_size = batch_size // device_count - logging.info('local_batch_size : %d', local_batch_size) - logging.info('device_batch_size : %d', device_batch_size) - - shuffle_seed = config.get('shuffle_seed', None) - if dataset_service_address and shuffle_seed is not None: - raise ValueError('Using dataset service with a random seed causes each ' - 'worker to produce exactly the same data. Add ' - 'config.shuffle_seed = None to your config if you want ' - 'to run with dataset service.') - - dataset = dataset_builder( - batch_size=local_batch_size, - eval_batch_size=eval_local_batch_size, - num_shards=jax.local_device_count(), - dtype_str=config.data_dtype_str, - rng=data_rng, - shuffle_seed=shuffle_seed, - dataset_configs=config.get('dataset_configs'), - dataset_service_address=dataset_service_address) - - return dataset \ No newline at end of file diff --git a/uncertainty_baselines/models/segmenter_test.py b/uncertainty_baselines/models/segmenter_test.py index 028c3d878..b489ee3b8 100644 --- a/uncertainty_baselines/models/segmenter_test.py +++ b/uncertainty_baselines/models/segmenter_test.py @@ -44,7 +44,8 @@ def test_segmenter_transformer(self, num_classes, hidden_size, img_h, img_w): config.backbone_configs.mlp_dim = 2 config.backbone_configs.num_heads = 1 config.backbone_configs.num_layers = 1 - config.backbone_configs.classifier = 'token' + # TODO(kellybuchanan): include 'token' test + config.backbone_configs.classifier = 'gap' config.decoder_configs = ml_collections.ConfigDict() config.decoder_configs.type = 'linear' From 27e2cf06964105de8045812c64cdfe5b2d9c73d5 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 00:07:42 -0500 Subject: [PATCH 004/150] include segmenter data loader and trainer using scenic's infra --- experimental/cityscapes/README.md | 5 +- experimental/cityscapes/custom_models.py | 20 +- .../cityscapes/custom_segmentation_trainer.py | 434 ++++++++++++++++++ experimental/cityscapes/deterministic.py | 230 ++++------ .../imagenet21k_segmenter_cityscapes.py | 5 +- 5 files changed, 524 insertions(+), 170 deletions(-) create mode 100644 experimental/cityscapes/custom_segmentation_trainer.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index a4a216f87..284ae1dd3 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -1,6 +1,5 @@ # Log -[] include loading cityscapes dataset - -[] include deterministic training for segmenter +[x] include loading cityscapes dataset +[x] include deterministic training for segmenter diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 0b3824733..323d47cd6 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -10,29 +10,11 @@ class SegmenterSegmentationModel(SegmentationModel): """Segmenter model for segmentation task.""" def build_flax_model(self): - """ return SegVit( num_classes=self.dataset_meta_data['num_classes'], - patches=self.config.model.get('patches', {}), - backbone_configs=self.config.model.get('backbone_configs', {}), - decoder_configs=self.config.model.get('decoder_configs', {})) - """ - return SegVit( - num_classes=self.num_classes, patches=self.config.patches, backbone_configs=self.config.backbone_configs, - decoder_configs=self.config.model.decoder_configs) + decoder_configs=self.config.decoder_configs) def default_flax_model_config(self): raise NotImplementedError() - """ - return ml_collections.ConfigDict({ - 'model': - dict( - num_classes=19, - patches=patches, - block_size=(64, 128, 256, 512), - data_dtype_str='float32') - }) - """ - diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py new file mode 100644 index 000000000..439ecb606 --- /dev/null +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -0,0 +1,434 @@ +""" +Custom segmentation_trainer.py + +# cannot import train_step, eval_step due to tuple segmenter output in ub implementation +Minor changes to account for ub models which ouput a tuple (logits, dict) +""" + +import functools +from typing import Any, Callable, Dict, Tuple, Optional, Type + +import flax.linen as nn +import jax +import jax.numpy as jnp +import jax.profiler +import ml_collections +import numpy as np +from absl import logging +from clu import metric_writers +from clu import periodic_actions +from flax import jax_utils +from jax.experimental.optimizers import clip_grads + +from scenic.dataset_lib import dataset_utils +from scenic.model_lib.base_models import base_model +from scenic.train_lib import lr_schedules +from scenic.train_lib import optimizers +from scenic.train_lib import train_utils + +# from scenic.train_lib.segmentation_trainer import train_step, eval_step, _draw_side_by_side +Batch = Dict[str, jnp.ndarray] +MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], + Dict[str, Tuple[float, int]]] +LossFn = Callable[[jnp.ndarray, Batch, Optional[jnp.ndarray]], float] + +from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix + +def train_step( + *, + flax_model: nn.Module, + train_state: train_utils.TrainState, + batch: Batch, + learning_rate_fn: Callable[[int], float], + loss_fn: LossFn, + metrics_fn: MetricFn, + config: ml_collections.ConfigDict, + debug: Optional[bool] = False +) -> Tuple[train_utils.TrainState, Dict[str, Tuple[float, int]], float, + jnp.ndarray]: + """Runs a single step of training. + + Given the state of the training and a batch of data, computes + the loss and updates the parameters of the model. + + Note that in this code, the buffers of the first (train_state) and second + (batch) arguments are donated to the computation. + + Args: + flax_model: A Flax model. + train_state: The state of training including the current + global_step, model_state, rng, and optimizer. The buffer of this argument + can be donated to the computation. + batch: A single batch of data. The buffer of this argument can be donated to + the computation. + learning_rate_fn: learning rate scheduler which give the global_step + generates the learning rate. + loss_fn: A loss function that given logits, a batch, and parameters of the + model calculates the loss. + metrics_fn: A metrics function that given logits and batch of data, + calculates the metrics as well as the loss. + config: Configurations of the experiment. + debug: Whether the debug mode is enabled during training. + `debug=True` enables model specific logging/storing some values using + jax.host_callback. + + Returns: + Updated state of training, computed metrics, learning rate, and predictions + for logging. + """ + new_rng, rng = jax.random.split(train_state.rng) + # Bind the rng to the host/device we are on. + dropout_rng = train_utils.bind_rng_to_host_device( + rng, axis_name='batch', bind_to='device') + + def training_loss_fn(params): + variables = {'params': params, **train_state.model_state} + (logits, _), new_model_state = flax_model.apply( + variables, + batch['inputs'], + mutable=['batch_stats'], + train=True, + rngs={'dropout': dropout_rng}, + debug=debug) + loss = loss_fn(logits, batch, variables['params']) + return loss, (new_model_state, logits) + + compute_gradient_fn = jax.value_and_grad(training_loss_fn, has_aux=True) + step = train_state.global_step + lr = learning_rate_fn(step) + (train_cost, + (new_model_state, + logits)), grad = compute_gradient_fn(train_state.optimizer.target) + + del train_cost + # Re-use same axis_name as in the call to `pmap(...train_step...)` below. + grad = jax.lax.pmean(grad, axis_name='batch') + + if config.get('max_grad_norm', None) is not None: + grad = clip_grads(grad, config.max_grad_norm) + + new_optimizer = train_state.optimizer.apply_gradient(grad, learning_rate=lr) + + # Explicit weight decay, if necessary. + if config.get('explicit_weight_decay', None) is not None: + new_optimizer = new_optimizer.replace( + target=optimizers.tree_map_with_names( + functools.partial( + optimizers.decay_weight_fn, + lr=lr, + decay=config.explicit_weight_decay), + new_optimizer.target, + match_name_fn=lambda name: 'kernel' in name)) + + metrics = metrics_fn(logits, batch) + new_train_state = train_state.replace( # pytype: disable=attribute-error + global_step=step + 1, + optimizer=new_optimizer, + model_state=new_model_state, + rng=new_rng) + return new_train_state, metrics, lr, jnp.argmax(logits, axis=-1) + + +def eval_step( + *, + flax_model: nn.Module, + train_state: train_utils.TrainState, + batch: Batch, + metrics_fn: MetricFn, + debug: Optional[bool] = False +) -> Tuple[Batch, jnp.ndarray, Dict[str, Tuple[float, int]], jnp.ndarray]: + """Runs a single step of training. + + Note that in this code, the buffer of the second argument (batch) is donated + to the computation. + + Assumed API of metrics_fn is: + ```metrics = metrics_fn(logits, batch) + where batch is yielded by the batch iterator, and metrics is a dictionary + mapping metric name to a vector of per example measurements. eval_step will + aggregate (by summing) all per example measurements and divide by the + aggregated normalizers. For each given metric we compute: + 1/N sum_{b in batch_iter} metric(b), where N is the sum of normalizer + over all batches. + + Args: + flax_model: A Flax model. + train_state: TrainState, the state of training including the current + global_step, model_state, rng, and optimizer. The buffer of this argument + can be donated to the computation. + batch: A single batch of data. a metrics function, that given logits and + batch of data, calculates the metrics as well as the loss. + metrics_fn: A metrics function, that given logits and batch of data, + calculates the metrics as well as the loss. + debug: Whether the debug mode is enabled during evaluation. + `debug=True` enables model specific logging/storing some values using + jax.host_callback. + + Returns: + Batch, predictions and calculated metrics. + """ + variables = { + 'params': train_state.optimizer.target, + **train_state.model_state + } + logits, _ = flax_model.apply( + variables, batch['inputs'], train=False, mutable=False, debug=debug) + metrics = metrics_fn(logits, batch) + + confusion_matrix = get_confusion_matrix( + labels=batch['label'], logits=logits, batch_mask=batch['batch_mask']) + + # Collect predictions and batches from all hosts. + predictions = jnp.argmax(logits, axis=-1) + predictions = jax.lax.all_gather(predictions, 'batch') + batch = jax.lax.all_gather(batch, 'batch') + confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') + + return batch, predictions, metrics, confusion_matrix + + +def train( + *, + rng: jnp.ndarray, + config: ml_collections.ConfigDict, + model_cls: Type[base_model.BaseModel], + dataset: dataset_utils.Dataset, + workdir: str, + writer: metric_writers.MetricWriter, +) -> Tuple[train_utils.TrainState, Dict[str, Any], Dict[str, Any]]: + """Main training loop lives in this function. + + Given the model class and dataset, it prepares the items needed to run the + training, including the TrainState. + + Args: + rng: Jax rng key. + config: Configurations of the experiment. + model_cls: Model class; A model has a flax_module, a loss_fn, and a + metrics_fn associated with it. + dataset: The dataset that has train_iter, eval_iter, meta_data, and + optionally, test_iter. + workdir: Directory for checkpointing. + writer: CLU metrics writer instance. + + Returns: + train_state that has the state of training (including current + global_step, model_state, rng, and the optimizer), train_summary + and eval_summary which are dict of metrics. These outputs are used for + regression testing. + + Timeline: + - Updated from scenic.train_lib.segmentation_trainer.train + """ + lead_host = jax.process_index() == 0 + # Build the loss_fn, metrics, and flax_model. + model = model_cls(config, dataset.meta_data) + + # Initialize model. + rng, init_rng = jax.random.split(rng) + (params, model_state, num_trainable_params, + gflops) = train_utils.initialize_model( + model_def=model.flax_model, + input_spec=[(dataset.meta_data['input_shape'], + dataset.meta_data.get('input_dtype', jnp.float32))], + config=config, + rngs=init_rng) + + # Create optimizer. + # We jit this, such that the arrays that are created are created on the same + # device as the input is, in this case the CPU. Else they'd be on device[0]. + optimizer = jax.jit( + optimizers.get_optimizer(config).create, backend='cpu')( + params) + rng, train_rng = jax.random.split(rng) + train_state = train_utils.TrainState( + global_step=0, + optimizer=optimizer, + model_state=model_state, + rng=train_rng, + accum_train_time=0) + start_step = train_state.global_step + if config.checkpoint: + train_state, start_step = train_utils.restore_checkpoint( + workdir, train_state) + # Replicate the optimzier, state, and rng. + train_state = jax_utils.replicate(train_state) + del params # Do not keep a copy of the initial params. + + # Calculate the total number of training steps. + total_steps, steps_per_epoch = train_utils.get_num_training_steps( + config, dataset.meta_data) + # Get learning rate scheduler. + learning_rate_fn = lr_schedules.get_learning_rate_fn(config) + + train_step_pmapped = jax.pmap( + functools.partial( + train_step, + flax_model=model.flax_model, + learning_rate_fn=learning_rate_fn, + loss_fn=model.loss_function, + metrics_fn=model.get_metrics_fn('train'), + config=config, + debug=config.debug_train), + axis_name='batch', + # We can donate both buffers of train_state and train_batch. + donate_argnums=(0, 1), + ) + + ############### EVALUATION CODE ################# + + eval_step_pmapped = jax.pmap( + functools.partial( + eval_step, + flax_model=model.flax_model, + metrics_fn=model.get_metrics_fn('validation'), + debug=config.debug_eval), + axis_name='batch', + # We can donate the eval_batch's buffer. + ) + + # Ceil rounding such that we include the last incomplete batch. + total_eval_steps = int( + np.ceil(dataset.meta_data['num_eval_examples'] / config.batch_size)) + steps_per_eval = config.get('steps_per_eval') or total_eval_steps + + def evaluate(train_state: train_utils.TrainState, + step: int) -> Dict[str, Any]: + eval_metrics = [] + eval_all_confusion_mats = [] + # Sync model state across replicas. + train_state = train_utils.sync_model_state_across_replicas(train_state) + def to_cpu(x): + return jax.device_get(dataset_utils.unshard(jax_utils.unreplicate(x))) + for _ in range(steps_per_eval): + eval_batch = next(dataset.valid_iter) + e_batch, e_predictions, e_metrics, confusion_matrix = eval_step_pmapped( + train_state=train_state, batch=eval_batch) + eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) + # Evaluate global metrics on one of the hosts (lead_host), but given + # intermediate values collected from all hosts. + if lead_host and global_metrics_fn is not None: + # Collect data to be sent for computing global metrics. + eval_all_confusion_mats.append(to_cpu(confusion_matrix)) + + eval_global_metrics_summary = {} + if lead_host and global_metrics_fn is not None: + eval_global_metrics_summary = global_metrics_fn(eval_all_confusion_mats, + dataset.meta_data) + + ############### LOG EVAL SUMMARY ############### + eval_summary = train_utils.log_eval_summary( + step=step, + eval_metrics=eval_metrics, + extra_eval_summary=eval_global_metrics_summary, + writer=writer) + # Visualize val predictions for one batch: + if lead_host: + images = _draw_side_by_side(to_cpu(e_batch), to_cpu(e_predictions)) + example_viz = { + f'val/example_{i}': image[None, ...] for i, image in enumerate(images) + } + writer.write_images(step, example_viz) + + writer.flush() + del eval_metrics + return eval_summary + + log_eval_steps = config.get('log_eval_steps') or steps_per_epoch + if not log_eval_steps: + raise ValueError("'log_eval_steps' should be specified in the config.") + log_summary_steps = config.get('log_summary_steps') or log_eval_steps + checkpoint_steps = config.get('checkpoint_steps') or log_eval_steps + + train_metrics, extra_training_logs = [], [] + train_summary, eval_summary = None, None + global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error + + chrono = train_utils.Chrono( + first_step=start_step, + total_steps=total_steps, + steps_per_epoch=steps_per_epoch, + global_bs=config.batch_size, + accum_train_time=int(jax_utils.unreplicate(train_state.accum_train_time))) + + logging.info('Starting training loop at step %d.', start_step + 1) + report_progress = periodic_actions.ReportProgress( + num_train_steps=total_steps, writer=writer) + hooks = [report_progress] + if config.get('xprof', True) and lead_host: + hooks.append(periodic_actions.Profile(num_profile_steps=5, logdir=workdir)) + + if start_step == 0: + step0_log = {'num_trainable_params': num_trainable_params} + if gflops: + step0_log['gflops'] = gflops + writer.write_scalars(1, step0_log) + + for step in range(start_step + 1, total_steps + 1): + with jax.profiler.StepTraceContext('train', sfLtep_num=step): + train_batch = next(dataset.train_iter) + train_state, t_metrics, lr, train_predictions = train_step_pmapped( + train_state=train_state, batch=train_batch) + # This will accumulate metrics in TPU memory up to the point that we log + # them. This is no problem for small metrics but may be a problem for + # large (e.g. segmentation) metrics. An alternative is to set + # `log_summary_steps` to a small number, or to use + # `train_utils.unreplicate_and_get` here instead of right before writing + # summaries, but that means in each step, we have data transfer between + # tpu and host, which might slow down the training. + train_metrics.append(t_metrics) + # Additional training logs: learning rate: + extra_training_logs.append({'learning_rate': lr}) + + for h in hooks: + h(step) + chrono.pause() # Below are once-in-a-while ops -> pause. + if step % log_summary_steps == 0 or (step == total_steps): + ############### LOG TRAIN SUMMARY ############### + if lead_host: + chrono.tick(step, writer=writer) + # Visualize segmentations using side-by-side gt-pred images: + images = _draw_side_by_side( + jax.device_get(dataset_utils.unshard(train_batch)), + jax.device_get(dataset_utils.unshard(train_predictions))) + example_viz = { + f'train/example_{i}': image[None, ...] + for i, image in enumerate(images) + } + writer.write_images(step, example_viz) + + train_summary = train_utils.log_train_summary( + step=step, + train_metrics=jax.tree_map(train_utils.unreplicate_and_get, + train_metrics), + extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, + extra_training_logs), + writer=writer) + # Reset metric accumulation for next evaluation cycle. + train_metrics, extra_training_logs = [], [] + + if (step % log_eval_steps == 0) or (step == total_steps): + with report_progress.timed('eval'): + # Sync model state across replicas (in case of having model state, e.g. + # batch statistic when using batch norm). + train_state = train_utils.sync_model_state_across_replicas(train_state) + eval_summary = evaluate(train_state, step) + + if ((step % checkpoint_steps == 0 and step > 0) or + (step == total_steps)) and config.checkpoint: + ################### CHECK POINTING ########################## + with report_progress.timed('checkpoint'): + # Sync model state across replicas. + train_state = train_utils.sync_model_state_across_replicas(train_state) + if lead_host: + train_state.replace( # pytype: disable=attribute-error + accum_train_time=chrono.accum_train_time) + train_utils.save_checkpoint(workdir, train_state) + + chrono.resume() # Un-pause now. + + # Wait until computations are done before exiting. + jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() + # Return the train and eval summary after last step for regresesion testing. + return train_state, train_summary, eval_summary + diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index 00b45d26c..76bd9c1f0 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -20,42 +20,28 @@ # Runs with """ -import functools -from functools import partial # pylint: disable=g-importing-member so standard -import itertools -import multiprocessing -import numbers import os import sys -#%% + +# %% +import jax +# %% +import tensorflow as tf +# %% from absl import app from absl import flags from absl import logging -from clu import metric_writers -from clu import parameter_overview -from clu import periodic_actions -from clu import preprocess_spec -#%% -import flax -import flax.jax_utils as flax_utils -import jax -import jax.numpy as jnp from ml_collections.config_flags import config_flags -import numpy as np -import robustness_metrics as rm -#%% -import tensorflow as tf -#import train_utils # local file import -import uncertainty_baselines as ub +from tensorflow.io import gfile +import custom_models +import custom_segmentation_trainer # scenic dependencies for debugging -from scenic.train_lib import lr_schedules -from scenic.train_lib import optimizers from scenic.train_lib import train_utils -from flax import jax_utils -import custom_models +# import train_utils # local file import + #%% config_flags.DEFINE_config_file( 'config', None, 'Training configuration.', lock_config=True) @@ -74,138 +60,88 @@ def write_note(note): if jax.process_index() == 0: logging.info('NOTE: %s', note) -#%% + + +from clu import metric_writers + + +def run(config, workdir): + """Prepares model, and dataset for training. + + This creates summary directories, summary writers, model definition, and + builds datasets to be sent to the main training script. + + Args: + config: ConfigDict; Hyper parameters. + workdir: string; Root directory for the experiment. + + Returns: + The outputs of trainer.train(), which are train_state, train_summary, and + eval_summary. + """ + lead_host = jax.process_index() == 0 + # set up the train_dir and log_dir + gfile.makedirs(workdir) + #workdir = os.path.join(workdir, 'trial') + #gfile.makedirs(workdir) + + summary_writer = None + if lead_host and config.write_summary: + tensorboard_dir = os.path.join(workdir, 'tb_summaries') + gfile.makedirs(tensorboard_dir) + # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) + summary_writer = metric_writers.SummaryWriter(tensorboard_dir) + + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + rng = jax.random.PRNGKey(config.rng_seed) + logging.info('rng: %s', rng) + + model_cls = custom_models.SegmenterSegmentationModel + + # ---------------------- + # Load dataset + # ---------------------- + data_rng, rng = jax.random.split(rng) + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + write_note('Loading dataset...') + + # TODO: update num_classes + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + return rng, model_cls, dataset, config, workdir, summary_writer + + def main(config, output_dir): + + print('config') + print(config) seed = config.get('rng_seed', 0) rng = jax.random.PRNGKey(seed) tf.random.set_seed(seed) - write_note('Initializing...') - - # Train dataset configs - data_rng, rng = jax.random.split(rng) - - # ---------------------- - # Load dataset - # ---------------------- - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - write_note('Loading dataset...') - - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + print('workdir ', output_dir) + rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) + print('workdir ', workdir) # ---------------------- - # Define model - # ---------------------- - write_note('Creating model...') - model = ub.models.segmenter_transformer( - num_classes=config.num_classes, - patches=config.patches, - backbone_configs=config.backbone_configs, - decoder_configs=config.decoder_configs - ) - # ---------------------- - # Initialize model + # Train function # ---------------------- - # Here we follow the scenic/model_lib/base_models/segmentation_model.py - from scenic.train_lib.train_utils import initialize_model - """ - #TODO(kellybuchanan): update local_batch_size according to train_utils - local_batch_size = 1 - @partial(jax.jit, backend='cpu') - def init(rng): - #image_size = tuple(train_ds.element_spec['image'].shape[2:]) - image_size = config.dataset_configs.target_size + (3,) - logging.info('image_size = %s', image_size) - dummy_input = jnp.zeros((local_batch_size,) + image_size, jnp.float32) - params = flax.core.unfreeze(model.init(rng, dummy_input, - train=False))['params'] - - return params - rng, init_rng = jax.random.split(rng) - params_cpu = init(init_rng) - """ - rng, init_rng = jax.random.split(rng) - (params, model_state, num_trainable_params, - gflops) = train_utils.initialize_model( - model_def=model, #.flax_model, - input_spec=[(dataset.meta_data['input_shape'], - dataset.meta_data.get('input_dtype', jnp.float32))], - config=config, - rngs=init_rng) + train_fn = custom_segmentation_trainer.train - # ---------------------- - # Create optimizer - # ---------------------- - """ - # Load the optimizer from flax. - opt_name = config.get('optimizer') - write_note(f'Initializing {opt_name} optimizer...') - opt_def = getattr(flax.optim, opt_name)(**config.get('optimizer_configs', {})) - - # We jit this, such that the arrays that are created are created on the same - # device as the input is, in this case the CPU. Else they'd be on device[0]. - opt_cpu = jax.jit(opt_def.create)(params_cpu) - """ - optimizer = jax.jit( - optimizers.get_optimizer(config).create, backend='cpu')( - params) - rng, train_rng = jax.random.split(rng) - train_state = train_utils.TrainState( - global_step=0, - optimizer=optimizer, - model_state=model_state, - rng=train_rng, - accum_train_time=0) - - start_step = train_state.global_step - - if config.checkpoint: - train_state, start_step = train_utils.restore_checkpoint( - workdir, train_state) - # Replicate the optimzier, state, and rng. - train_state = jax_utils.replicate(train_state) - del params # Do not keep a copy of the initial params. - - # Calculate the total number of training steps. - total_steps, steps_per_epoch = train_utils.get_num_training_steps( - config, dataset.meta_data) - # Get learning rate scheduler. - learning_rate_fn = lr_schedules.get_learning_rate_fn(config) - - # --- STOP --- - # TODO: debug train_step in scenic/train_lib/segmentation_trainer.py - # import pdb; pdb.set_trace() - - train_step_pmapped = jax.pmap( - functools.partial( - train_step, - flax_model=model,#.flax_model, - learning_rate_fn=learning_rate_fn, - loss_fn=model.loss_function, - metrics_fn=model.get_metrics_fn('train'), - config=config, - debug=config.debug_train), - axis_name='batch', - # We can donate both buffers of train_state and train_batch. - donate_argnums=(0, 1), - ) - - - #dummy_input = jnp.zeros((local_batch_size,) + image_size, jnp.float32) - - #inputs = jnp.ones([num_examples, img_h, img_w, 3], jnp.float32) - #model = ub.models.segmenter_transformer(**config) - #key = jax.random.PRNGKey(0) - #variables = model.init(key, inputs, train=False) - #logits, outputs = model.apply(variables, inputs, train=False) - #variables = model.init(rng, inputs, train=False) - #logits, outputs = model.apply(variables, inputs, train=False) + train_state, train_summary, eval_summary = train_fn(rng=rng, model_cls=model_cls, dataset=dataset, + config=config, + workdir=output_dir, writer=summary_writer) + print(train_summary) return diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index 4ac8aff8c..d44331cc8 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -92,9 +92,12 @@ def get_config(): config.write_summary = True # write TB and/or XM summary config.write_xm_measurements = True # write XM measurements #config.xprof = False # Profile using xprof - config.checkpoint = False # do checkpointing + config.checkpoint = True # do checkpointing config.checkpoint_steps = 5 * steps_per_epoch + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 200 # extra config.args = {} From 49db9dd079e71c0a1c1184e75113899c7f7a2aea Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 12:44:56 -0500 Subject: [PATCH 005/150] include flags to debug on mac using only a subset of training data available --- experimental/cityscapes/custom_segmentation_trainer.py | 1 + .../cityscapes/experiments/imagenet21k_segmenter_cityscapes.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 439ecb606..4732ca940 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -26,6 +26,7 @@ from scenic.train_lib import optimizers from scenic.train_lib import train_utils +# instead of importing we use local functions # from scenic.train_lib.segmentation_trainer import train_step, eval_step, _draw_side_by_side Batch = Dict[str, jnp.ndarray] MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index d44331cc8..c134f4f95 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -35,6 +35,9 @@ def get_config(): config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (512, 512) + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = 10 + config.dataset_configs.number_eval_examples_debug = 10 # config following scenic config.num_classes = 19 From e56df3b4b104d35acc4aaed2e8a4a904ce1c4fcd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 13:12:14 -0500 Subject: [PATCH 006/150] this commit includes a trainable implementation segmenter on cityscapes for 10 train samples for 1 epoch --- experimental/cityscapes/custom_segmentation_trainer.py | 2 +- .../experiments/imagenet21k_segmenter_cityscapes.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 4732ca940..27af92bd8 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -172,7 +172,7 @@ def eval_step( 'params': train_state.optimizer.target, **train_state.model_state } - logits, _ = flax_model.apply( + (logits, _) = flax_model.apply( variables, batch['inputs'], train=False, mutable=False, debug=debug) metrics = metrics_fn(logits, batch) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index c134f4f95..66cc8742f 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -78,11 +78,13 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = 10 // config.batch_size + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 @@ -100,7 +102,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = 200 + config.log_eval_steps = 1 #200 # extra config.args = {} From 08aad99510901174e1815a40f75a34474eaa4ffb Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 19:01:00 -0500 Subject: [PATCH 007/150] include configs to debug model on gcp vms --- .../imagenet21k_segmenter_cityscapes1.py | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py new file mode 100644 index 000000000..60c870fba --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -0,0 +1,144 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 1 + +# debug on mac +if DEBUG == 1: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/small vit +elif DEBUG == 2: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/regular vit +elif DEBUG == 3: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (512, 512) + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = [4, 4] + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps #200 + + # extra + config.args = {} + + return config + + +def get_sweep(hyper): + return hyper.product([]) From 85f3a6f175079d261c780f56e90d9331bb633390 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 19:41:13 -0500 Subject: [PATCH 008/150] update patch size --- .../imagenet21k_segmenter_cityscapes1.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py index 60c870fba..49399b459 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -24,7 +24,9 @@ _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 1 +STRIDE = 4 +target_size=(512, 512) # debug on mac if DEBUG == 1: batch_size = 1 @@ -61,6 +63,19 @@ num_heads = 12 num_layers = 12 hidden_size = 768 +elif DEBUG == 4: + target_size =(128, 128) + STRIDE=16 + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 def get_config(): @@ -71,7 +86,7 @@ def get_config(): config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = (512, 512) + config.dataset_configs.target_size = target_size # flags to debug scenic on mac config.dataset_configs.number_train_examples_debug = number_train_examples_debug config.dataset_configs.number_eval_examples_debug = number_train_examples_debug @@ -80,7 +95,7 @@ def get_config(): config.num_classes = 19 config.patches = ml_collections.ConfigDict() - config.patches.size = [4, 4] + config.patches.size = (STRIDE, STRIDE) config.backbone_configs = ml_collections.ConfigDict() config.backbone_configs.type = 'vit' From e33696ec70624198e0fd6fe023b632b1ace83062 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 19:59:10 -0500 Subject: [PATCH 009/150] add config to train model on all data for 1 epoch --- .../imagenet21k_segmenter_cityscapes2.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py new file mode 100644 index 000000000..dd62f4b24 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py @@ -0,0 +1,115 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +train model on all data for 1 epoch +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +#STRIDE = 4 + +target_size =(128, 128) +STRIDE=16 +batch_size=8 +num_training_epochs = 1 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps #200 + + # extra + config.args = {} + + return config + + +def get_sweep(hyper): + return hyper.product([]) From 2b81aef2a6e760984f8d5f845e4bd3fdcf6ee489 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 16 Nov 2021 23:48:49 -0500 Subject: [PATCH 010/150] add init_from flag to allow model to load pretrained checkpoints --- .../cityscapes/custom_segmentation_trainer.py | 10 ++ .../imagenet21k_segmenter_cityscapes11.py | 163 ++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 27af92bd8..1ba0e4959 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -252,6 +252,16 @@ def train( if config.checkpoint: train_state, start_step = train_utils.restore_checkpoint( workdir, train_state) + + #import pdb; pdb.set_trace() + # Load pretrained model + if (start_step == 0 # Which means "no" checkpoint is restored! + and config.get('init_from') is not None): + raise NotImplementedError("") + elif start_step == 0: + logging.info('Training completely from scratch.' + 'Not restoring from any checkpoint.') + # Replicate the optimzier, state, and rng. train_state = jax_utils.replicate(train_state) del params # Do not keep a copy of the initial params. diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py new file mode 100644 index 000000000..9c007be3a --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py @@ -0,0 +1,163 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +init from checkpoints +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 1 +STRIDE = 4 + +target_size=(512, 512) +# debug on mac +if DEBUG == 1: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/small vit +elif DEBUG == 2: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/regular vit +elif DEBUG == 3: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif DEBUG == 4: + target_size =(128, 128) + STRIDE=16 + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # init + #config.init_from = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" + + # learning rate + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' #* cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps #200 + + # extra + config.args = {} + + return config + + +def get_sweep(hyper): + return hyper.product([]) From 5de076cbf083c3e68156ba0536785908a3012f06 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 17 Nov 2021 11:14:09 -0500 Subject: [PATCH 011/150] include flag to check for pretrained backbone --- .../cityscapes/custom_pretrain_utils.py | 100 ++++++++++++++++++ .../cityscapes/custom_segmentation_trainer.py | 21 +++- .../imagenet21k_segmenter_cityscapes11.py | 16 ++- 3 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 experimental/cityscapes/custom_pretrain_utils.py diff --git a/experimental/cityscapes/custom_pretrain_utils.py b/experimental/cityscapes/custom_pretrain_utils.py new file mode 100644 index 000000000..c0e2db071 --- /dev/null +++ b/experimental/cityscapes/custom_pretrain_utils.py @@ -0,0 +1,100 @@ +# Copyright 2021 The Scenic Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for using pretrained models. + +Edited from scenic/train_lib/pretrain_utils.py +""" + +import collections +import os +import re +from typing import Any, Dict, Mapping, List, Optional, Union, Tuple + +from absl import logging +import flax +from flax.training import checkpoints +import jax +import numpy as np + +from scenic.train_lib import train_utils, pretrain_utils +from tensorflow.io import gfile + +# JAX team is working on type annotation for pytree: +# https://github.com/google/jax/issues/1555 +PyTree = Union[Mapping[str, Mapping], Any] + + +def convert_ub_to_scenic_checkpoint( + checkpoint_path: str, + train_state: Optional[train_utils.TrainState] = None, + convert_to_linen: bool = True) -> train_utils.TrainState: + """Converts a BigVision checkpoint to a scenic train state. + + The model weights, global step and accumulated train time are extracted. + Optimizer state, such as the momentum, is not extracted. + + Args: + checkpoint_path: Path to BigVision checkpoint. + train_state: A Scenic TrainState object. + convert_to_linen: Whether to convert to Linen format. + + Returns: + restored_train_state: Scenic train state with model weights, global step + and accumulated training time. + """ + + def unflatten_dict(flattened: Dict[str, Any], + separator: str = '/', + leaf_idx: int = -1) -> Dict[str, Any]: + unflattened = {} + for k, v in flattened.items(): + subtree = unflattened + if leaf_idx != 0: + path = k.split(separator)[:leaf_idx] + else: + path = k.split(separator) + for k2 in path[:-1]: + if k2 not in subtree: + subtree[k2] = {} + subtree = subtree[k2] + subtree[path[-1]] = v + return unflattened + + logging.info('Loading bigvision checkpoint from %s', checkpoint_path) + checkpoint_data = np.load(gfile.GFile(checkpoint_path, 'rb')) + tree = unflatten_dict(checkpoint_data, separator='/', leaf_idx=0) + + import pdb; pdb.set_trace() + restored_params = tree['opt']['target'] + if convert_to_linen: + restored_params = checkpoints.convert_pre_linen(restored_params) + restored_params = dict(restored_params) + if train_state: + restored_params = pretrain_utils.inspect_params( + expected_params=train_state.optimizer.target, + restored_params=restored_params, + fail_if_extra=False, + fail_if_missing=False, + fail_if_shapes_mismatch=False) + else: + train_state = train_utils.TrainState() + # pytype: disable=wrong-arg-types + restored_train_state = train_state.replace( # pytype: disable=attribute-error + global_step=int(tree['opt']['state']['step']), + optimizer={'target': restored_params}, + accum_train_time=int(tree['extra']['accum_train_time'])) + # pytype: enable=wrong-arg-types + + return restored_train_state diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 1ba0e4959..29ed0fbf7 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -35,6 +35,9 @@ from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix +#import custom_pretrain_utils +from flax.training.checkpoints import restore_checkpoint as flax_restore_checkpoint + def train_step( *, flax_model: nn.Module, @@ -254,10 +257,20 @@ def train( workdir, train_state) #import pdb; pdb.set_trace() - # Load pretrained model - if (start_step == 0 # Which means "no" checkpoint is restored! - and config.get('init_from') is not None): - raise NotImplementedError("") + # Load pretrained backbone + if start_step == 0 and config.get('load_pretrained_backbone', False): + #raise NotImplementedError("") + bb_checkpoint_path = config.pretrained_backbone_configs.get( + 'checkpoint_path') + bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) + + #init_checkpoint_path = config.init_from.get('checkpoint_path') + #checkpoint_format = config.init_from.get('checkpoint_format', 'ub') + + #if checkpoint_format == 'ub': + # restored_train_state = custom_pretrain_utils.convert_ub_to_scenic_checkpoint( + # init_checkpoint_path, train_state) + elif start_step == 0: logging.info('Training completely from scratch.' 'Not restoring from any checkpoint.') diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py index 9c007be3a..f02fe68c9 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py @@ -16,7 +16,11 @@ # pylint: disable=line-too-long r"""Segmenter + cityscapes. -init from checkpoints +include flag to init from checkpoints. + +command to run locally: +python deterministic.py --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs1" --num_cores=0 --use_gpu=False --tpu=None --config="experiments/imagenet21k_segmenter_cityscapes11.py" + """ # pylint: enable=line-too-long @@ -124,7 +128,15 @@ def get_config(): config.focal_loss_gamma = 0.0 # init - #config.init_from = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" + #config.init_from = ml_collections.ConfigDict() + #config.init_from.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" + #config.init_from.checkpoint_format = 'ub' + #config.init_from.restore_backbone_embedding = True + + # pretrained backbone + #config.load_pretrained_backbone = True + #config.pretrained_backbone_configs = ml_collections.ConfigDict() + #config.pretrained_backbone_configs.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" # learning rate #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size From afa8fec2587ed54e15de5d0983a97bac81916f7f Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 17 Nov 2021 11:23:45 -0500 Subject: [PATCH 012/150] include config for 100 epochs --- .../imagenet21k_segmenter_cityscapes3.py | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py new file mode 100644 index 000000000..d00c3b4ef --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py @@ -0,0 +1,114 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +#STRIDE = 4 + +target_size =(128, 128) +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + + +def get_config(): + """Config for training segmenter on """ + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps #200 + + # extra + config.args = {} + + return config + + +def get_sweep(hyper): + return hyper.product([]) From 245de00ba519008fe2d46275c9fd47a953feeb8b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 17 Nov 2021 16:27:56 -0500 Subject: [PATCH 013/150] (1) vit backbone classifier is fixed to 'gap' for segmenter model (2) update segmenter_test to test different classifiers --- uncertainty_baselines/models/segmenter.py | 2 +- uncertainty_baselines/models/segmenter_test.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/uncertainty_baselines/models/segmenter.py b/uncertainty_baselines/models/segmenter.py index a895fc7d8..45f7c7e7f 100644 --- a/uncertainty_baselines/models/segmenter.py +++ b/uncertainty_baselines/models/segmenter.py @@ -336,7 +336,7 @@ def __call__(self, x: jnp.ndarray, *, train: bool, debug: bool = False): hidden_size=self.backbone_configs.hidden_size, dropout_rate=self.backbone_configs.dropout_rate, attention_dropout_rate=self.backbone_configs.attention_dropout_rate, - classifier=self.backbone_configs.classifier, + classifier='gap', name='backbone')( x, train=train) else: diff --git a/uncertainty_baselines/models/segmenter_test.py b/uncertainty_baselines/models/segmenter_test.py index b489ee3b8..ca7646999 100644 --- a/uncertainty_baselines/models/segmenter_test.py +++ b/uncertainty_baselines/models/segmenter_test.py @@ -25,10 +25,13 @@ class SegVitTest(parameterized.TestCase): @parameterized.parameters( - (2, 16, 224, 224), + (2, 2, 1, 12, 1, 'gap'), + (2, 2, 1, 12, 1, 'token'), ) - def test_segmenter_transformer(self, num_classes, hidden_size, img_h, img_w): + def test_segmenter_transformer(self, num_classes, mlp_dim, num_heads, num_layers, hidden_size, classifier): # VisionTransformer. + img_h = 224 + img_w = 224 config = ml_collections.ConfigDict() config.num_classes = num_classes @@ -41,11 +44,10 @@ def test_segmenter_transformer(self, num_classes, hidden_size, img_h, img_w): config.backbone_configs.hidden_size = hidden_size config.backbone_configs.attention_dropout_rate = 0. config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = 2 - config.backbone_configs.num_heads = 1 - config.backbone_configs.num_layers = 1 - # TODO(kellybuchanan): include 'token' test - config.backbone_configs.classifier = 'gap' + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.classifier = classifier config.decoder_configs = ml_collections.ConfigDict() config.decoder_configs.type = 'linear' From 062172b432134d57b621c75d88f1d81a7cfdc5c0 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 29 Nov 2021 13:05:02 -0500 Subject: [PATCH 014/150] update model --- experimental/cityscapes/README.md | 1 + .../cityscapes/custom_pretrain_utils.py | 100 ------------------ 2 files changed, 1 insertion(+), 100 deletions(-) delete mode 100644 experimental/cityscapes/custom_pretrain_utils.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 284ae1dd3..974649c81 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -3,3 +3,4 @@ [x] include loading cityscapes dataset [x] include deterministic training for segmenter +[ ] include init from pretrained backbone diff --git a/experimental/cityscapes/custom_pretrain_utils.py b/experimental/cityscapes/custom_pretrain_utils.py deleted file mode 100644 index c0e2db071..000000000 --- a/experimental/cityscapes/custom_pretrain_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2021 The Scenic Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utility functions for using pretrained models. - -Edited from scenic/train_lib/pretrain_utils.py -""" - -import collections -import os -import re -from typing import Any, Dict, Mapping, List, Optional, Union, Tuple - -from absl import logging -import flax -from flax.training import checkpoints -import jax -import numpy as np - -from scenic.train_lib import train_utils, pretrain_utils -from tensorflow.io import gfile - -# JAX team is working on type annotation for pytree: -# https://github.com/google/jax/issues/1555 -PyTree = Union[Mapping[str, Mapping], Any] - - -def convert_ub_to_scenic_checkpoint( - checkpoint_path: str, - train_state: Optional[train_utils.TrainState] = None, - convert_to_linen: bool = True) -> train_utils.TrainState: - """Converts a BigVision checkpoint to a scenic train state. - - The model weights, global step and accumulated train time are extracted. - Optimizer state, such as the momentum, is not extracted. - - Args: - checkpoint_path: Path to BigVision checkpoint. - train_state: A Scenic TrainState object. - convert_to_linen: Whether to convert to Linen format. - - Returns: - restored_train_state: Scenic train state with model weights, global step - and accumulated training time. - """ - - def unflatten_dict(flattened: Dict[str, Any], - separator: str = '/', - leaf_idx: int = -1) -> Dict[str, Any]: - unflattened = {} - for k, v in flattened.items(): - subtree = unflattened - if leaf_idx != 0: - path = k.split(separator)[:leaf_idx] - else: - path = k.split(separator) - for k2 in path[:-1]: - if k2 not in subtree: - subtree[k2] = {} - subtree = subtree[k2] - subtree[path[-1]] = v - return unflattened - - logging.info('Loading bigvision checkpoint from %s', checkpoint_path) - checkpoint_data = np.load(gfile.GFile(checkpoint_path, 'rb')) - tree = unflatten_dict(checkpoint_data, separator='/', leaf_idx=0) - - import pdb; pdb.set_trace() - restored_params = tree['opt']['target'] - if convert_to_linen: - restored_params = checkpoints.convert_pre_linen(restored_params) - restored_params = dict(restored_params) - if train_state: - restored_params = pretrain_utils.inspect_params( - expected_params=train_state.optimizer.target, - restored_params=restored_params, - fail_if_extra=False, - fail_if_missing=False, - fail_if_shapes_mismatch=False) - else: - train_state = train_utils.TrainState() - # pytype: disable=wrong-arg-types - restored_train_state = train_state.replace( # pytype: disable=attribute-error - global_step=int(tree['opt']['state']['step']), - optimizer={'target': restored_params}, - accum_train_time=int(tree['extra']['accum_train_time'])) - # pytype: enable=wrong-arg-types - - return restored_train_state From 7c02d78a4ab0596351a805de78d3261e3e1921ce Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 29 Nov 2021 13:13:02 -0500 Subject: [PATCH 015/150] add code to preload weights -- which can fail if frozen dictionary is used --- experimental/cityscapes/custom_models.py | 92 +++++++++++++++++++ .../cityscapes/custom_segmentation_trainer.py | 47 +++++++--- .../imagenet21k_segmenter_cityscapes.py | 2 +- .../imagenet21k_segmenter_cityscapes11.py | 53 ++++------- uncertainty_baselines/models/segmenter.py | 2 +- .../models/segmenter_test.py | 1 - 6 files changed, 146 insertions(+), 51 deletions(-) diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 323d47cd6..8f4479c07 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -1,10 +1,14 @@ """ Custom models which allow for model inheritance """ +from typing import Any, Callable, Dict, Iterable, Mapping, Optional, Tuple, List +from absl import logging from scenic.model_lib.base_models.segmentation_model import SegmentationModel from uncertainty_baselines.models.segmenter import SegVit import ml_collections +import numpy as np +import scipy class SegmenterSegmentationModel(SegmentationModel): """Segmenter model for segmentation task.""" @@ -18,3 +22,91 @@ def build_flax_model(self): def default_flax_model_config(self): raise NotImplementedError() + + def init_backbone_from_train_state( + self, train_state: Any, restored_train_state: Any, + restored_model_cfg: ml_collections.ConfigDict) -> Any: + """ + Edited from scenic. + Updates the train_state with data from restored_train_state. + Here, we do some surgery and replace parts of the parameters/model_state + in the train_state with some parameters/model_state from the + pretrained_train_state. + Note that the grid shape of our model can be different from that of the + pretrained model (position embeddings are adapted by interpolation). + Args: + train_state: A raw TrainState for the model. + restored_train_state: A TrainState that is loaded with parameters/state of + a pretrained model. + restored_model_cfg: Configuration of the model from which the + restored_train_state come from. Usually used for some asserts. + Returns: + Updated train_state. + """ + # Get grid sizes of target model: + gs_segvit = [ + self.config.dataset_configs.target_size[0] // + self.config.patches.size[0], + self.config.dataset_configs.target_size[1] // + self.config.patches.size[1] + ] + + # Get grid sizes of restored model: + if 'patches' in restored_model_cfg: + restored_patches_cfg = restored_model_cfg.patches + else: + restored_patches_cfg = restored_model_cfg.stem_configs.patches + if 'grid' in restored_patches_cfg: + gs_vit = restored_patches_cfg.grid + else: + raise NotImplementedError("") + #init_dset_meta = self.config.model.backbone.init_from.dataset_meta_data + #gs_vit = [ + # init_dset_meta['input_shape'][1] // restored_patches_cfg.size[0], + # init_dset_meta['input_shape'][2] // restored_patches_cfg.size[1], + #] + + #TODO(kellybuchanan): check issue where FrozenDict is immutable. + + #backbone = train_state.optimizer.target.params['backbone'] + #restored_param = restored_train_state.optimizer['target']['params'] + + backbone = train_state.optimizer.target['backbone'] + restored_param = restored_train_state.optimizer['target'] + for m_key, m_params in restored_param.items(): + # load parameters for embedding (CNN at stem) + if m_key in ['embedding']: + backbone[m_key] = m_params + + # load parameters for Transformer encoder + if m_key == 'Transformer': + for tm_key, tm_params in m_params.items(): + if tm_key == 'posembed_input': # might need resolution change + vit_posemb = m_params['posembed_input']['pos_embedding'] + segvit_posemb = backbone[m_key]['posembed_input']['pos_embedding'] + if vit_posemb.shape != segvit_posemb.shape: + # rescale the grid of pos, embeddings: param shape is (1,N,768) + segvit_ntok = segvit_posemb.shape[1] + if restored_model_cfg.classifier == 'token': + # the first token is the CLS token + vit_posemb = vit_posemb[0, 1:] + else: + vit_posemb = vit_posemb[0] + logging.info('Resized variant: %s to %s', vit_posemb.shape, + segvit_posemb.shape) + assert np.prod(gs_vit) == vit_posemb.shape[0] + assert np.prod(gs_segvit) == segvit_ntok + if gs_vit != gs_segvit: # we need resolution change + logging.info('Grid-size from %s to %s', gs_vit, gs_segvit) + vit_posemb_grid = vit_posemb.reshape(gs_vit + [-1]) + zoom = (gs_segvit[0] / gs_vit[0], gs_segvit[1] / gs_vit[1], 1) + vit_posemb_grid = scipy.ndimage.zoom( + vit_posemb_grid, zoom, order=1) + vit_posemb = vit_posemb_grid.reshape(1, np.prod(gs_segvit), -1) + else: # just the cls token was extra and we are now fine + vit_posemb = np.expand_dims(vit_posemb, axis=0) + backbone[m_key][tm_key]['pos_embedding'] = vit_posemb + else: # other parameters of the Transformer encoder + backbone[m_key][tm_key] = tm_params + + return train_state diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 29ed0fbf7..6629cc0ef 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -34,9 +34,10 @@ LossFn = Callable[[jnp.ndarray, Batch, Optional[jnp.ndarray]], float] from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix - -#import custom_pretrain_utils from flax.training.checkpoints import restore_checkpoint as flax_restore_checkpoint +from scenic.train_lib import pretrain_utils + +from pretrainer_utils import load_bb_config def train_step( *, @@ -256,20 +257,42 @@ def train( train_state, start_step = train_utils.restore_checkpoint( workdir, train_state) - #import pdb; pdb.set_trace() # Load pretrained backbone if start_step == 0 and config.get('load_pretrained_backbone', False): - #raise NotImplementedError("") - bb_checkpoint_path = config.pretrained_backbone_configs.get( - 'checkpoint_path') - bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) - #init_checkpoint_path = config.init_from.get('checkpoint_path') - #checkpoint_format = config.init_from.get('checkpoint_format', 'ub') + #TODO(kellybuchanan): check out partial loader in + # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# + + # Loader from scenic + # cannot restore using flax + # bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') + # bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) + + bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') + checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') + bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') + + #TODO(kellybuchanan): read config file directly from bb_model_cfg_file + restored_model_cfg = load_bb_config(config) + + if checkpoint_format == 'ub': + # load params from checkpoint + bb_train_state = pretrain_utils.convert_bigvision_to_scenic_checkpoint( + checkpoint_path=bb_checkpoint_path) + # cannot control the params loaded + # model_prefix_path = ['backbone'] + # train_state = pretrain_utils.init_from_pretrain_state( + # train_state, bb_train_state, model_prefix_path=model_prefix_path) + # can control the params loaded + import pdb; pdb.set_trace() + train_state = model.init_backbone_from_train_state(train_state, + bb_train_state, + restored_model_cfg) + + - #if checkpoint_format == 'ub': - # restored_train_state = custom_pretrain_utils.convert_ub_to_scenic_checkpoint( - # init_checkpoint_path, train_state) + else: + raise NotImplementedError("") elif start_step == 0: logging.info('Training completely from scratch.' diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index 66cc8742f..c736433bc 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -84,7 +84,7 @@ def get_config(): # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant'# * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py index f02fe68c9..2ce5fa428 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py @@ -29,9 +29,10 @@ _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 1 -STRIDE = 4 +STRIDE = 16 + +target_size = (128, 128) -target_size=(512, 512) # debug on mac if DEBUG == 1: batch_size = 1 @@ -44,39 +45,13 @@ num_heads = 1 num_layers = 1 hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/small vit -elif DEBUG == 2: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 +elif DEBUG == 5: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 num_training_epochs = 1 # ml_collections.FieldReference(100) log_eval_steps = 1 - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/regular vit -elif DEBUG == 3: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 -elif DEBUG == 4: - target_size =(128, 128) - STRIDE=16 - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - mlp_dim = 3072 num_heads = 12 num_layers = 12 @@ -89,6 +64,7 @@ def get_config(): config.experiment_name = 'cityscapes_segvit_ub' + #dataset config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size @@ -106,7 +82,7 @@ def get_config(): config.backbone_configs.type = 'vit' config.backbone_configs.attention_dropout_rate = 0. config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' + config.backbone_configs.classifier = 'token' config.backbone_configs.mlp_dim = mlp_dim config.backbone_configs.num_heads = num_heads @@ -134,9 +110,14 @@ def get_config(): #config.init_from.restore_backbone_embedding = True # pretrained backbone - #config.load_pretrained_backbone = True - #config.pretrained_backbone_configs = ml_collections.ConfigDict() - #config.pretrained_backbone_configs.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" + config.load_pretrained_backbone = True + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" + config.pretrained_backbone_configs.checkpoint_format = "ub" + config.pretrained_backbone_configs.checkpoint_path = + + # doesn't work? + #config.pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" # learning rate #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size diff --git a/uncertainty_baselines/models/segmenter.py b/uncertainty_baselines/models/segmenter.py index 45f7c7e7f..a895fc7d8 100644 --- a/uncertainty_baselines/models/segmenter.py +++ b/uncertainty_baselines/models/segmenter.py @@ -336,7 +336,7 @@ def __call__(self, x: jnp.ndarray, *, train: bool, debug: bool = False): hidden_size=self.backbone_configs.hidden_size, dropout_rate=self.backbone_configs.dropout_rate, attention_dropout_rate=self.backbone_configs.attention_dropout_rate, - classifier='gap', + classifier=self.backbone_configs.classifier, name='backbone')( x, train=train) else: diff --git a/uncertainty_baselines/models/segmenter_test.py b/uncertainty_baselines/models/segmenter_test.py index ca7646999..9af1287b2 100644 --- a/uncertainty_baselines/models/segmenter_test.py +++ b/uncertainty_baselines/models/segmenter_test.py @@ -26,7 +26,6 @@ class SegVitTest(parameterized.TestCase): @parameterized.parameters( (2, 2, 1, 12, 1, 'gap'), - (2, 2, 1, 12, 1, 'token'), ) def test_segmenter_transformer(self, num_classes, mlp_dim, num_heads, num_layers, hidden_size, classifier): # VisionTransformer. From 1dc1998ff8459b9ad363e0c9a8e3b0f59f04db84 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 29 Nov 2021 22:50:09 -0500 Subject: [PATCH 016/150] (1) load weights of network pretrained on imagenet --- experimental/cityscapes/call_deterministic.sh | 23 ++ experimental/cityscapes/custom_models.py | 240 +++++++++++++----- .../cityscapes/custom_segmentation_trainer.py | 50 ++-- .../experiments/cityscapes_segvit_config.py | 227 +++++++++++++++++ .../imagenet21k_segmenter_cityscapes11.py | 1 - .../imagenet21k_segmenter_cityscapes12.py | 171 +++++++++++++ .../imagenet21k_segmenter_cityscapes13.py | 151 +++++++++++ experimental/cityscapes/pretrainer_utils.py | 44 ++++ experimental/cityscapes/run_pretrained.sh | 31 +++ 9 files changed, 850 insertions(+), 88 deletions(-) create mode 100755 experimental/cityscapes/call_deterministic.sh create mode 100644 experimental/cityscapes/experiments/cityscapes_segvit_config.py create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py create mode 100644 experimental/cityscapes/pretrainer_utils.py create mode 100755 experimental/cityscapes/run_pretrained.sh diff --git a/experimental/cityscapes/call_deterministic.sh b/experimental/cityscapes/call_deterministic.sh new file mode 100755 index 000000000..5a84bff6a --- /dev/null +++ b/experimental/cityscapes/call_deterministic.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +config='experiments/imagenet21k_segmenter_cityscapes.py' +use_gpu=False + +if [ "$(uname)" == "Darwin" ]; then + # Do something under Mac OS X platform + output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" + num_cores=0 + tpu='None' +python deterministic.py -- --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" --num_cores=0 --use_gpu=False --tpu=False --config='experiments/imagenet21k_segmenter_cityscapes.py' + +elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then + # Do something under GNU/Linux platform + output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs" + tpu='local' + num_cores=8 + python3 deterministic.py -- --output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs" --num_cores=8 --tpu='local' --config='experiments/imagenet21k_segmenter_cityscapes.py' + +fi + +#python deterministic.py "--output_dir=$output_dir --num_cores=$num_cores --use_gpu=$use_gpu --tpu=$tpu --config=$config" + diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 8f4479c07..47afeabb2 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -1,48 +1,64 @@ """ Custom models which allow for model inheritance """ -from typing import Any, Callable, Dict, Iterable, Mapping, Optional, Tuple, List -from absl import logging +import re +from typing import Any, Mapping, Optional, Tuple, List, Union -from scenic.model_lib.base_models.segmentation_model import SegmentationModel -from uncertainty_baselines.models.segmenter import SegVit +import flax import ml_collections import numpy as np import scipy +from absl import logging + +from scenic.model_lib.base_models.segmentation_model import SegmentationModel +from scenic.train_lib import train_utils +from uncertainty_baselines.models.segmenter import SegVit + +# JAX team is working on type annotation for pytree: +# https://github.com/google/jax/issues/1555 +PyTree = Union[Mapping[str, Mapping], Any] + class SegmenterSegmentationModel(SegmentationModel): """Segmenter model for segmentation task.""" def build_flax_model(self): return SegVit( - num_classes=self.dataset_meta_data['num_classes'], - patches=self.config.patches, - backbone_configs=self.config.backbone_configs, - decoder_configs=self.config.decoder_configs) + num_classes=self.dataset_meta_data['num_classes'], + patches=self.config.patches, + backbone_configs=self.config.backbone_configs, + decoder_configs=self.config.decoder_configs) def default_flax_model_config(self): raise NotImplementedError() def init_backbone_from_train_state( - self, train_state: Any, restored_train_state: Any, - restored_model_cfg: ml_collections.ConfigDict) -> Any: - """ - Edited from scenic. - Updates the train_state with data from restored_train_state. - Here, we do some surgery and replace parts of the parameters/model_state - in the train_state with some parameters/model_state from the - pretrained_train_state. - Note that the grid shape of our model can be different from that of the - pretrained model (position embeddings are adapted by interpolation). + self, + train_state: train_utils.TrainState, + restored_train_state: Mapping[str, Any], + restored_model_cfg: ml_collections.ConfigDict, + ckpt_prefix_path: Optional[List[str]] = None, + model_prefix_path: Optional[List[str]] = None, + name_mapping: Optional[Mapping[str, str]] = None, + skip_regex: Optional[str] = None) -> train_utils.TrainState: + """Updates the train_state with data from pretrain_state. + Args: train_state: A raw TrainState for the model. restored_train_state: A TrainState that is loaded with parameters/state of - a pretrained model. + a pretrained model. restored_model_cfg: Configuration of the model from which the - restored_train_state come from. Usually used for some asserts. + restored_train_state come from. Usually used for some asserts. + ckpt_prefix_path: Prefix to restored model parameters. + model_prefix_path: Prefix to the parameters to replace in the subtree model. + name_mapping: Mapping from parameter names of checkpoint to this model. + skip_regex: If there is a parameter whose parent keys match the regex, + the parameter will not be replaced from pretrain_state. + Returns: Updated train_state. """ + # --------------------------------- # Get grid sizes of target model: gs_segvit = [ self.config.dataset_configs.target_size[0] // @@ -60,53 +76,145 @@ def init_backbone_from_train_state( gs_vit = restored_patches_cfg.grid else: raise NotImplementedError("") - #init_dset_meta = self.config.model.backbone.init_from.dataset_meta_data - #gs_vit = [ + + # init_dset_meta = self.config.model.backbone.init_from.dataset_meta_data + # gs_vit = [ # init_dset_meta['input_shape'][1] // restored_patches_cfg.size[0], # init_dset_meta['input_shape'][2] // restored_patches_cfg.size[1], - #] - - #TODO(kellybuchanan): check issue where FrozenDict is immutable. - - #backbone = train_state.optimizer.target.params['backbone'] - #restored_param = restored_train_state.optimizer['target']['params'] - - backbone = train_state.optimizer.target['backbone'] - restored_param = restored_train_state.optimizer['target'] - for m_key, m_params in restored_param.items(): - # load parameters for embedding (CNN at stem) - if m_key in ['embedding']: - backbone[m_key] = m_params - - # load parameters for Transformer encoder - if m_key == 'Transformer': - for tm_key, tm_params in m_params.items(): - if tm_key == 'posembed_input': # might need resolution change - vit_posemb = m_params['posembed_input']['pos_embedding'] - segvit_posemb = backbone[m_key]['posembed_input']['pos_embedding'] - if vit_posemb.shape != segvit_posemb.shape: - # rescale the grid of pos, embeddings: param shape is (1,N,768) - segvit_ntok = segvit_posemb.shape[1] - if restored_model_cfg.classifier == 'token': - # the first token is the CLS token - vit_posemb = vit_posemb[0, 1:] - else: - vit_posemb = vit_posemb[0] - logging.info('Resized variant: %s to %s', vit_posemb.shape, - segvit_posemb.shape) - assert np.prod(gs_vit) == vit_posemb.shape[0] - assert np.prod(gs_segvit) == segvit_ntok - if gs_vit != gs_segvit: # we need resolution change - logging.info('Grid-size from %s to %s', gs_vit, gs_segvit) - vit_posemb_grid = vit_posemb.reshape(gs_vit + [-1]) - zoom = (gs_segvit[0] / gs_vit[0], gs_segvit[1] / gs_vit[1], 1) - vit_posemb_grid = scipy.ndimage.zoom( - vit_posemb_grid, zoom, order=1) - vit_posemb = vit_posemb_grid.reshape(1, np.prod(gs_segvit), -1) - else: # just the cls token was extra and we are now fine - vit_posemb = np.expand_dims(vit_posemb, axis=0) - backbone[m_key][tm_key]['pos_embedding'] = vit_posemb - else: # other parameters of the Transformer encoder - backbone[m_key][tm_key] = tm_params + # ] + + # --------------------------------- + name_mapping = name_mapping or {} + # converts pre-linen which doesn't apply here + # (restored_params, + # restored_model_state) = get_params_and_model_state_dict(restored_train_state) + restored_params = restored_train_state['optimizer']['target'] + restored_model_state = restored_train_state.get('model_state') + + model_params = train_state.optimizer.target + model_params = _replace_dict(model_params, + restored_params, + restored_model_cfg, + gs_vit, + gs_segvit, + ckpt_prefix_path, + model_prefix_path, + name_mapping, + skip_regex) + new_optimizer = train_state.optimizer.replace( + target=model_params) + train_state = train_state.replace( # pytype: disable=attribute-error + optimizer=new_optimizer) + if (restored_model_state is not None and + train_state.model_state is not None and + train_state.model_state): + if model_prefix_path: + # Insert model prefix after 'batch_stats'. + model_prefix_path = ['batch_stats'] + model_prefix_path + if 'batch_stats' in restored_model_state: + ckpt_prefix_path = ckpt_prefix_path or [] + ckpt_prefix_path = ['batch_stats'] + ckpt_prefix_path + elif 'batch_stats' not in restored_model_state: # Backward compatibility. + model_prefix_path = ['batch_stats'] + if ckpt_prefix_path and ckpt_prefix_path[0] != 'batch_stats': + ckpt_prefix_path = ['batch_stats'] + ckpt_prefix_path + model_state = _replace_dict(train_state.model_state, + restored_model_state, + restored_model_cfg, + gs_vit, + gs_segvit, + ckpt_prefix_path, + model_prefix_path, + name_mapping, + skip_regex) + train_state = train_state.replace( # pytype: disable=attribute-error + model_state=model_state) return train_state + + +def _replace_dict(model: PyTree, + restored: PyTree, + restored_model_cfg: ml_collections, + gs_vit: Optional[Tuple] = None, + gs_segvit: Optional[Tuple] = None, + ckpt_prefix_path: Optional[List[str]] = None, + model_prefix_path: Optional[List[str]] = None, + name_mapping: Optional[Mapping[str, str]] = None, + skip_regex: Optional[str] = None) -> PyTree: + """Replaces values in model dictionary with restored ones from checkpoint. + + Include changes to facilitate loading of pretrained variables + from an encoder w a token classifier. + """ + + model = flax.core.unfreeze(model) # pytype: disable=wrong-arg-types + restored = flax.core.unfreeze(restored) # pytype: disable=wrong-arg-types + + if ckpt_prefix_path: + for p in ckpt_prefix_path: + restored = restored[p] + + if model_prefix_path: + for p in reversed(model_prefix_path): + restored = {p: restored} + + # Flatten nested parameters to a dict of str -> tensor. Keys are tuples + # from the path in the nested dictionary to the specific tensor. E.g., + # {'a1': {'b1': t1, 'b2': t2}, 'a2': t3} + # -> {('a1', 'b1'): t1, ('a1', 'b2'): t2, ('a2',): t3}. + restored_flat = flax.traverse_util.flatten_dict( + dict(restored), keep_empty_nodes=True) + model_flat = flax.traverse_util.flatten_dict( + dict(model), keep_empty_nodes=True) + + for m_key, m_params in restored_flat.items(): + # pytype: disable=attribute-error + for name, to_replace in name_mapping.items(): + m_key = tuple(to_replace if k == name else k for k in m_key) + # pytype: enable=attribute-error + m_key_str = '/'.join(m_key) + if m_key not in model_flat: + logging.warning( + '%s in checkpoint doesn\'t exist in model. Skip.', m_key_str) + continue + if skip_regex and re.findall(skip_regex, m_key_str): + logging.info('Skip loading parameter %s.', m_key_str) + continue + logging.info('Loading %s from checkpoint into model', m_key_str) + + # fix if token + if 'posembed_input' in m_key: # might need resolution change + # the backbone should be pose segmenter + # vit_posemb = m_params['posembed_input']['pos_embedding'] + vit_posemb = m_params + # segvit_posemb = backbone[m_key]['posembed_input']['pos_embedding'] + segvit_posemb = model_flat[m_key] + + if vit_posemb.shape != segvit_posemb.shape: + # rescale the grid of pos, embeddings: param shape is (1,N,768) + segvit_ntok = segvit_posemb.shape[1] + if restored_model_cfg.classifier == 'token': + # the first token is the CLS token + vit_posemb = vit_posemb[0, 1:] + else: + vit_posemb = vit_posemb[0] + logging.info('Resized variant: %s to %s', vit_posemb.shape, + segvit_posemb.shape) + assert np.prod(gs_vit) == vit_posemb.shape[0] + assert np.prod(gs_segvit) == segvit_ntok + if gs_vit != gs_segvit: # we need resolution change + logging.info('Grid-size from %s to %s', gs_vit, gs_segvit) + vit_posemb_grid = vit_posemb.reshape(gs_vit + [-1]) + zoom = (gs_segvit[0] / gs_vit[0], gs_segvit[1] / gs_vit[1], 1) + vit_posemb_grid = scipy.ndimage.zoom(vit_posemb_grid, zoom, order=1) + vit_posemb = vit_posemb_grid.reshape(1, np.prod(gs_segvit), -1) + else: # just the cls token was extra and we are now fine + vit_posemb = np.expand_dims(vit_posemb, axis=0) + m_params = vit_posemb + + assert model_flat[m_key].shape == m_params.shape + model_flat[m_key] = m_params + + return flax.core.freeze(flax.traverse_util.unflatten_dict(model_flat)) + diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 6629cc0ef..04d9ba684 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -39,6 +39,7 @@ from pretrainer_utils import load_bb_config + def train_step( *, flax_model: nn.Module, @@ -253,50 +254,57 @@ def train( rng=train_rng, accum_train_time=0) start_step = train_state.global_step - if config.checkpoint: - train_state, start_step = train_utils.restore_checkpoint( - workdir, train_state) # Load pretrained backbone if start_step == 0 and config.get('load_pretrained_backbone', False): - - #TODO(kellybuchanan): check out partial loader in + # TODO(kellybuchanan): check out partial loader in # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# + bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') + checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') + # bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') + # Loader from scenic # cannot restore using flax - # bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') + # Mathias suggested to try flax_restore_checkpoint # bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) + # but we get an error *** msgpack.exceptions.ExtraData: unpack(b) received extra data. - bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') - checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') - bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') - - #TODO(kellybuchanan): read config file directly from bb_model_cfg_file + # TODO(kellybuchanan): read config file directly from bb_model_cfg_file restored_model_cfg = load_bb_config(config) if checkpoint_format == 'ub': + # import pdb; pdb.set_trace() # load params from checkpoint bb_train_state = pretrain_utils.convert_bigvision_to_scenic_checkpoint( - checkpoint_path=bb_checkpoint_path) - # cannot control the params loaded - # model_prefix_path = ['backbone'] - # train_state = pretrain_utils.init_from_pretrain_state( - # train_state, bb_train_state, model_prefix_path=model_prefix_path) - # can control the params loaded - import pdb; pdb.set_trace() + checkpoint_path=bb_checkpoint_path, + convert_to_linen=False) + + # option 1: failed as variables are a frozen dictionary + # could be used with flax.core.unfreeze, flax.core.freeze train_state = model.init_backbone_from_train_state(train_state, bb_train_state, - restored_model_cfg) + restored_model_cfg, + model_prefix_path=['backbone']) + # option2: it fails for embeddings as this mode + # doesn't allow to specify loaded params . + # model_prefix_path = ['backbone'] + # train_state = pretrain_utils.init_from_pretrain_state( + # train_state, bb_train_state, model_prefix_path=model_prefix_path) else: raise NotImplementedError("") elif start_step == 0: - logging.info('Training completely from scratch.' - 'Not restoring from any checkpoint.') + logging.info('Not restoring from any pretrained_backbone.') + + if config.checkpoint: + train_state, start_step = train_utils.restore_checkpoint( + workdir, train_state) + else: + logging.info('Not restoring from any checkpoints.') # Replicate the optimzier, state, and rng. train_state = jax_utils.replicate(train_state) diff --git a/experimental/cityscapes/experiments/cityscapes_segvit_config.py b/experimental/cityscapes/experiments/cityscapes_segvit_config.py new file mode 100644 index 000000000..3fd2a2676 --- /dev/null +++ b/experimental/cityscapes/experiments/cityscapes_segvit_config.py @@ -0,0 +1,227 @@ +# pylint: disable=line-too-long +r"""Default configs for Cityscapes segmentation. + +Launch a TPU job: +gxm third_party/py/scenic/google/xm/launch_xm.py -- \ + --binary //third_party/py/scenic/projects/segvit:main \ + --config='third_party/py/scenic/projects/segvit/configs/cityscapes_segvit_config.py' \ + --platform=df_8x8 --xm_resource_alloc=group:brain/grand-vision-xm-df \ + --priority=115 \ + --exp_name=cityscapes_segvit \ + --notes "R50-ViT-B/16 1024x2048 sweep" + +Test run: xid/27318283 +Performance: ~78% mIoU (WID 9 https://flatboard.corp.google.com/plot/hu4ooWrx4t0) + +""" +# pylint: enable=line-too-long + +import ml_collections + +_CITYSCAPES_TRAIN_SIZE = 2975 + +# Model specs. +VIT_SIZE = 'B' +STRIDE = 16 +RESNET_SIZE = 50 +CLASSIFIER = 'token' + +# JFT pretrained models derived from: +# https://colab.corp.google.com/drive/1GNO2D-BhZGX8UARyZCQ8xfhlCea42yx9#scrollTo=UXdJdTS6rfsx +MODEL_PATHS = { + ('B', 32, 50, 'token'): + '/cns/tp-d/home/dune/task_adapt/xzhai/tmp/hybrid/17221856/5/checkpoint.npz', + ('B', 16, 50, 'token'): + '/cns/tp-d/home/dune/task_adapt/xzhai/tmp/hybrid/17221856/6/checkpoint.npz', + ('B', 32, None, 'token'): + '/cns/tp-d/home/brain-ber/adosovitskiy/17084881/1/checkpoint.npz', + ('B', 16, None, 'token'): + '/cns/vz-d/home/brain-ber/adosovitskiy/17402132/1/checkpoint.npz', + ('L', 32, 50, 'token'): + '/cns/tp-d/home/brain-ber/adosovitskiy/17215117/1/checkpoint.npz', + ('L', 16, 50, 'token'): + '/cns/tp-d/home/brain-ber/adosovitskiy/17193867/2/checkpoint.npz', + ('L', 32, None, 'token'): + '/cns/lu-d/home/brain-ber/adosovitskiy/17085772/1/checkpoint.npz', + ('L', 16, None, 'token'): + '/cns/tp-d/home/brain-ber/adosovitskiy/17192124/1/checkpoint.npz', +} + +MODEL_PATH = MODEL_PATHS[(VIT_SIZE, STRIDE, RESNET_SIZE, CLASSIFIER)] + + +def get_config(): + """Returns the configuration for Cityscapes segmentation.""" + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segvit' + + # dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + + # model + config.model_name = 'segmenter' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit_plus' + config.model.backbone.body = get_backbone_config(config) + + # decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + num_training_epochs = ml_collections.FieldReference(100) + config.num_training_epochs = num_training_epochs + config.batch_size = 128 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # init + config.init_from = ml_collections.ConfigDict() + config.init_from.codebase = 'bigvision' + config.init_from.checkpoint_path = MODEL_PATH + config.init_from.xm = None + config.init_from.model_prefix_path = ['backbone', 'resformer'] + + # logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = False # debug mode during training + config.debug_eval = False # debug mode during eval + config.log_eval_steps = 200 + return config + + +def get_backbone_config(config): + """Get ViT+-style ViT backbone configs.""" + body_config = ml_collections.ConfigDict() + body_config.type = 'resformer' + body_config.patches = None + if RESNET_SIZE == 50: + body_config.resnet = ml_collections.ConfigDict() + body_config.resnet.depth = (3, 4, 6, 3) + body_config.resnet.width = 1.0 + elif RESNET_SIZE is None: + body_config.patches = {'size': (STRIDE, STRIDE)} + else: + raise NotImplementedError('') + + if RESNET_SIZE and STRIDE == 16: + depth = body_config.resnet.depth + depth = depth[:-2] + (sum(depth[-2:]),) + body_config.resnet.depth = depth + + body_config.transformer = ml_collections.ConfigDict() + body_config.transformer.dropout_rate = 0.1 + + if VIT_SIZE == 'B': + body_config.transformer.mlp_dim = 3072 + body_config.transformer.num_heads = 12 + body_config.transformer.num_layers = 12 + body_config.hidden_size = 768 + elif VIT_SIZE == 'L': + body_config.transformer.mlp_dim = 4096 + body_config.transformer.num_heads = 16 + body_config.transformer.num_layers = 24 + body_config.hidden_size = 1024 + else: + raise NotImplementedError('') + + body_config.classifier = CLASSIFIER + body_config.representation_size = None + + body_config.grid_size = ( + config.dataset_configs.target_size[0] // STRIDE, + config.dataset_configs.target_size[1] // STRIDE, + ) + + return body_config + + +def model(hyper, vit_size, stride, resnet_size, classifier): + """Defines models for sweep.""" + overwrites = [] + if resnet_size == 50: + depth = (3, 4, 6, 3) + if stride == 16: + depth = depth[:-2] + (sum(depth[-2:]),) + overwrites.append( + hyper.sweep('config.model.backbone.body.resnet.depth', [depth])) + overwrites.append( + hyper.sweep('config.model.backbone.body.resnet.width', [1.0])) + overwrites.append(hyper.sweep('config.model.backbone.body.patches', [None])) + elif resnet_size is None: + overwrites.append( + hyper.sweep('config.model.backbone.body.patches', [{ + 'size': (stride, stride) + }])) + else: + raise NotImplementedError('') + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.body.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.body.transformer.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.body.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.model.backbone.body.classifier', [classifier])) + overwrites.append( + hyper.sweep('config.init_from.checkpoint_path', + [MODEL_PATHS[(vit_size, stride, resnet_size, classifier)]])) + + return hyper.product(overwrites) + + +def get_hyper(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + models = hyper.chainit([ + model(hyper, 'B', 16, RESNET_SIZE, CLASSIFIER), + ]) + + return hyper.product([models]) \ No newline at end of file diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py index 2ce5fa428..141d81c3f 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py @@ -114,7 +114,6 @@ def get_config(): config.pretrained_backbone_configs = ml_collections.ConfigDict() config.pretrained_backbone_configs.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" config.pretrained_backbone_configs.checkpoint_format = "ub" - config.pretrained_backbone_configs.checkpoint_path = # doesn't work? #config.pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py new file mode 100644 index 000000000..dd8b56e34 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py @@ -0,0 +1,171 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +command to run locally: +python deterministic.py --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs1" --num_cores=0 --use_gpu=False --tpu=None --config="experiments/imagenet21k_segmenter_cityscapes12.py" + + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 5 +STRIDE = 16 +target_size = (128, 128) + +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +# debug on mac +if DEBUG == 1: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 + num_training_epochs = 1 + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 + +elif DEBUG == 5: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 + + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py new file mode 100644 index 000000000..8d5e3ff5e --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py @@ -0,0 +1,151 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 5 +STRIDE = 16 +target_size = (128, 128) + +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/pretrainer_utils.py b/experimental/cityscapes/pretrainer_utils.py new file mode 100644 index 000000000..35899ce6a --- /dev/null +++ b/experimental/cityscapes/pretrainer_utils.py @@ -0,0 +1,44 @@ +""" +Include utils for pretraining + +""" +import importlib +import os +import ml_collections + +def load_relative_config(relative_fpath): + """ Reads config of model in ub. + + Args: + relative_fpath: path of config file relative to its location in ub. + + """ + # loader = importlib.machinery.SourceFileLoader('get_config', os.path.abspath(relative_fpath)) + # config = loader.load_module() + # config_module_spec = importlib.util.spec_from_file_location('get_config', os.path.abspath("../../baselines/jft/experiments/imagenet21k_vit_base16.py")) + # config_module = importlib.util.module_from_spec(config_module_spec) + # config_module_spec.loader.exec_module(config_module) + # return config + raise NotImplementedError("") + + +def load_bb_config(config): + """ Temporary toy bb config. + + Args: + config: model config. + + Returns: + restored_model_cfg: mock model config + """ + restored_model_cfg = ml_collections.ConfigDict() + restored_model_cfg.patches = ml_collections.ConfigDict() + restored_model_cfg.patches.size = [16, 16] + restored_model_cfg.classifier = 'token' + # if config.pretrained_backbone_configs.type == 'base': + # restored_model_cfg.model.transformer.dropout_rate = 0.1 + + #TODO(kellybuchanan): calculate grid given config + restored_model_cfg.patches.grid = [224//16, 224//16] + + return restored_model_cfg diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh new file mode 100755 index 000000000..2a70a043f --- /dev/null +++ b/experimental/cityscapes/run_pretrained.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +# train cityscapes using segmenter with pretrained backbone +# supports 2 options to + + +if [ "$(uname)" == "Darwin" ]; then + # Do something under Mac OS X platform + config_file='experiments/imagenet21k_segmenter_cityscapes12.py' + output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" + num_cores=0 + tpu=False + use_gpu=False + python deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + # --tpu=$tpu +elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then + config_file='exp eriments/imagenet21k_segmenter_cityscapes13.py' + output_dir="/home/ekellbuch/cityscapes/outputs/run13" + num_cores=8 + tpu='local' + use_gpu=False + python deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config12} \ + --tpu=$tpu + +fi \ No newline at end of file From 6ed9fa7465bf8c3da24a7d4529c82c9e5bd8e545 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 30 Nov 2021 15:58:43 +0000 Subject: [PATCH 017/150] fix tou config to debug segmenter with pretrained backbone --- .../imagenet21k_segmenter_cityscapes13.py | 5 ++++- experimental/cityscapes/run_pretrained.sh | 16 ++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py index 8d5e3ff5e..acdb5b085 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py @@ -38,6 +38,9 @@ num_layers = 12 hidden_size = 768 +if DEBUG ==5: + number_train_examples_debug = 16 + def get_config(): """Config for cityscapes segmentation.""" config = ml_collections.ConfigDict() @@ -55,7 +58,7 @@ def get_config(): # config following scenic # model - config.model_name = 'segmenter_mini' + config.model_name = 'segmenter_pretrained_mini' config.model = ml_collections.ConfigDict() config.patches = ml_collections.ConfigDict() diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh index 2a70a043f..049d5dc98 100755 --- a/experimental/cityscapes/run_pretrained.sh +++ b/experimental/cityscapes/run_pretrained.sh @@ -4,7 +4,7 @@ # supports 2 options to -if [ "$(uname)" == "Darwin" ]; then +if [ "$(uname)" = "Darwin" ] ; then # Do something under Mac OS X platform config_file='experiments/imagenet21k_segmenter_cityscapes12.py' output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" @@ -16,16 +16,16 @@ if [ "$(uname)" == "Darwin" ]; then --use_gpu=$use_gpu \ --config=${config_file} \ # --tpu=$tpu -elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then - config_file='exp eriments/imagenet21k_segmenter_cityscapes13.py' - output_dir="/home/ekellbuch/cityscapes/outputs/run13" +elif [ "$(uname)" = "Linux" ]; then + echo "in Linux" + config_file='experiments/imagenet21k_segmenter_cityscapes13.py' + output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" num_cores=8 tpu='local' use_gpu=False - python deterministic.py --output_dir=${output_dir} \ + python3 deterministic.py --output_dir=${output_dir} \ --num_cores=$num_cores \ --use_gpu=$use_gpu \ - --config=${config12} \ + --config=${config_file} \ --tpu=$tpu - -fi \ No newline at end of file +fi From b7310327f39a722746a9d7ed50b18b278fad500b Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 30 Nov 2021 17:05:18 +0000 Subject: [PATCH 018/150] update config files for pretrained weights run in vm --- .../imagenet21k_segmenter_cityscapes.py | 96 +++++++---- .../imagenet21k_segmenter_cityscapes_sngp.py | 154 ++++++++++++++++++ experimental/cityscapes/run_pretrained_vm.sh | 24 +++ 3 files changed, 246 insertions(+), 28 deletions(-) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py create mode 100755 experimental/cityscapes/run_pretrained_vm.sh diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index c736433bc..180eb3f79 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -20,71 +20,88 @@ # pylint: enable=line-too-long import ml_collections -#import get_fewshot # local file import - _CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = True +DEBUG = 0 +STiRIDE = 16 +target_size = (128, 128) + +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 +if DEBUG ==5: + number_train_examples_debug = 16 def get_config(): - """Config for training a patch-transformer on JFT.""" + """Config for cityscapes segmentation.""" config = ml_collections.ConfigDict() - config.experiment_name = 'cityscapes_segvit_ub' + config.experiment_name = 'cityscapes_segvit_ub_init' + #dataset config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = (512, 512) + config.dataset_configs.target_size = target_size + # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = 10 - config.dataset_configs.number_eval_examples_debug = 10 + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic - config.num_classes = 19 + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() config.patches = ml_collections.ConfigDict() - config.patches.size = [4, 4] + config.patches.size = (STRIDE, STRIDE) config.backbone_configs = ml_collections.ConfigDict() config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None - if DEBUG: - config.backbone_configs.mlp_dim = 2 - config.backbone_configs.num_heads = 1 - config.backbone_configs.num_layers = 1 - config.backbone_configs.hidden_size = 1 - else: - config.backbone_configs.mlp_dim = 3072 - config.backbone_configs.num_heads = 12 - config.backbone_configs.num_layers = 12 - config.backbone_configs.hidden_size = 768 + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + #decoder config.decoder_configs = ml_collections.ConfigDict() config.decoder_configs.type = 'linear' # training + config.trainer_name = 'segvit_trainer' config.optimizer = 'adam' config.optimizer_configs = ml_collections.ConfigDict() config.l2_decay_factor = 0.0 config.max_grad_norm = 1.0 config.label_smoothing = None - num_training_epochs = 1 # ml_collections.FieldReference(100) config.num_training_epochs = num_training_epochs - config.batch_size = 1 + config.batch_size = batch_size config.rng_seed = 0 config.focal_loss_gamma = 0.0 # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = 10 // config.batch_size + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant'# * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 @@ -93,6 +110,10 @@ def get_config(): config.model_dtype_str = 'float32' config.data_dtype_str = 'float32' + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + #logging config.write_summary = True # write TB and/or XM summary config.write_xm_measurements = True # write XM measurements @@ -102,7 +123,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 #200 + config.log_eval_steps = log_eval_steps # 200 # extra config.args = {} @@ -110,5 +131,24 @@ def get_config(): return config +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + def get_sweep(hyper): return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py new file mode 100644 index 000000000..d95d05e78 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py @@ -0,0 +1,154 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 +STiRIDE = 16 +target_size = (128, 128) + +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_pretrained_vm.sh b/experimental/cityscapes/run_pretrained_vm.sh new file mode 100755 index 000000000..4ec136be6 --- /dev/null +++ b/experimental/cityscapes/run_pretrained_vm.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# train cityscapes using segmenter with pretrained backbone +# supports 2 options to + +declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ) + +num_cores=8 +tpu='local' +use_gpu=False + +for config_mode in "sngp" #"deterministic" #"sngp" +do +config_file="${configfiles[$config_mode]}" +#output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" +output_dir="gs://ub-ekb/segmenter/cityscapes/run0/$config_mode" +echo "${output_dir} ${config_file}" +python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu + +done From b747e07f1e4730f174e8d36fa958b6ab6e9d4d9b Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 30 Nov 2021 17:59:53 +0000 Subject: [PATCH 019/150] add config files to compare model trained from scratch, init using deterministic and sngp weights --- .../imagenet21k_segmenter_cityscapes.py | 5 +- .../imagenet21k_segmenter_cityscapes_sngp.py | 5 +- .../experiments/segmenter_cityscapes.py | 153 ++++++++++++++++++ experimental/cityscapes/run_pretrained_vm.sh | 5 +- 4 files changed, 159 insertions(+), 9 deletions(-) create mode 100644 experimental/cityscapes/experiments/segmenter_cityscapes.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py index 180eb3f79..710c16bef 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py @@ -22,9 +22,8 @@ import ml_collections _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 0 -STiRIDE = 16 -target_size = (128, 128) +target_size = (128, 128) LOAD_PRETRAINED_BACKBONE = True PRETRAIN_BACKBONE_TYPE = 'base' @@ -101,7 +100,7 @@ def get_config(): # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py index d95d05e78..b0c7cd649 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py @@ -22,9 +22,8 @@ import ml_collections _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 0 -STiRIDE = 16 -target_size = (128, 128) +target_size = (128, 128) LOAD_PRETRAINED_BACKBONE = True PRETRAIN_BACKBONE_TYPE = 'gp' @@ -101,7 +100,7 @@ def get_config(): # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 diff --git a/experimental/cityscapes/experiments/segmenter_cityscapes.py b/experimental/cityscapes/experiments/segmenter_cityscapes.py new file mode 100644 index 000000000..66387976d --- /dev/null +++ b/experimental/cityscapes/experiments/segmenter_cityscapes.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (128, 128) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_pretrained_vm.sh b/experimental/cityscapes/run_pretrained_vm.sh index 4ec136be6..fe03ba4a2 100755 --- a/experimental/cityscapes/run_pretrained_vm.sh +++ b/experimental/cityscapes/run_pretrained_vm.sh @@ -3,16 +3,15 @@ # train cityscapes using segmenter with pretrained backbone # supports 2 options to -declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ) +declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") num_cores=8 tpu='local' use_gpu=False -for config_mode in "sngp" #"deterministic" #"sngp" +for config_mode in "sngp" "deterministic" "scratch" do config_file="${configfiles[$config_mode]}" -#output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" output_dir="gs://ub-ekb/segmenter/cityscapes/run0/$config_mode" echo "${output_dir} ${config_file}" python3 deterministic.py --output_dir=${output_dir} \ From f7c935116ee328729a6f1c830b836340f5422d74 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 30 Nov 2021 15:29:16 -0500 Subject: [PATCH 020/150] add call to tensorboard to compare different runs in an experiment --- experimental/cityscapes/call_tensorboard.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100755 experimental/cityscapes/call_tensorboard.sh diff --git a/experimental/cityscapes/call_tensorboard.sh b/experimental/cityscapes/call_tensorboard.sh new file mode 100755 index 000000000..b9c57d647 --- /dev/null +++ b/experimental/cityscapes/call_tensorboard.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# map gsbucket to local +# ~/go/bin/gcsfuse --only-dir segmenter/cityscapes/run0 ub-ekb run0 + +# read local directory: +results_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/gsbucket_out/run0" +for d in results_dir ; do + echo "$d" +done + +tensorboard --logdir ${results_dir} --reload_multifile True + From bf0d783d8843209b17041abd2ab2014c617aa3d5 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 3 Dec 2021 16:37:08 -0500 Subject: [PATCH 021/150] add config files for tpu pods --- .../pod/imagenet21k_segmenter_cityscapes.py | 153 ++++++++++++++++++ .../imagenet21k_segmenter_cityscapes_sngp.py | 153 ++++++++++++++++++ .../experiments/pod/segmenter_cityscapes.py | 153 ++++++++++++++++++ 3 files changed, 459 insertions(+) create mode 100644 experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py create mode 100644 experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py create mode 100644 experimental/cityscapes/experiments/pod/segmenter_cityscapes.py diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py new file mode 100644 index 000000000..57a65b1eb --- /dev/null +++ b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py new file mode 100644 index 000000000..53de0e270 --- /dev/null +++ b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py new file mode 100644 index 000000000..3dc9a7c99 --- /dev/null +++ b/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) From 5fc05605f32a773cd34d937a2536666bf5dfcf7a Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 3 Dec 2021 17:17:07 -0500 Subject: [PATCH 022/150] update tpu pod config --- .../experiments/pod/imagenet21k_segmenter_cityscapes.py | 2 +- .../experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py | 2 +- experimental/cityscapes/experiments/pod/segmenter_cityscapes.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py index 57a65b1eb..f5e7c2c20 100644 --- a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py @@ -28,7 +28,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=16 -batch_size=8 +batch_size=32 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py index 53de0e270..0eb9434d1 100644 --- a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py +++ b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py @@ -28,7 +28,7 @@ PRETRAIN_BACKBONE_TYPE = 'gp' STRIDE=16 -batch_size=8 +batch_size=32 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py index 3dc9a7c99..6fa9d4c49 100644 --- a/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py +++ b/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py @@ -28,7 +28,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=16 -batch_size=8 +batch_size=32 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 From e143662671d2713f53356cce256e07cfde5255fc Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Mon, 6 Dec 2021 20:25:42 +0000 Subject: [PATCH 023/150] add run file for model with 256x256 --- ...imagenet21k_segmenter_cityscapes_larger.py | 153 ++++++++++++++++++ .../cityscapes/run_pretrained_vm_larger.sh | 23 +++ 2 files changed, 176 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py create mode 100755 experimental/cityscapes/run_pretrained_vm_larger.sh diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py new file mode 100644 index 000000000..ce2d94d96 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (256, 256) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_pretrained_vm_larger.sh b/experimental/cityscapes/run_pretrained_vm_larger.sh new file mode 100755 index 000000000..65d01c758 --- /dev/null +++ b/experimental/cityscapes/run_pretrained_vm_larger.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# train cityscapes using segmenter with pretrained backbone +# supports options to try + +declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes_larger.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") + +num_cores=8 +tpu='local' +use_gpu=False + +for config_mode in "deterministic" # "scratch" "sngp" +do +config_file="${configfiles[$config_mode]}" +output_dir="gs://ub-ekb/segmenter/cityscapes/run2/$config_mode" +echo "${output_dir} ${config_file}" +python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu + +done From 62329a8522515b9db3edac6309cf683fbdcf65dd Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Mon, 6 Dec 2021 20:28:20 +0000 Subject: [PATCH 024/150] add run file for model with 512x512 img --- ...magenet21k_segmenter_cityscapes_larger1.py | 153 ++++++++++++++++++ .../cityscapes/run_pretrained_vm_larger1.sh | 23 +++ 2 files changed, 176 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py create mode 100755 experimental/cityscapes/run_pretrained_vm_larger1.sh diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py new file mode 100644 index 000000000..57a65b1eb --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_pretrained_vm_larger1.sh b/experimental/cityscapes/run_pretrained_vm_larger1.sh new file mode 100755 index 000000000..8ece87d6e --- /dev/null +++ b/experimental/cityscapes/run_pretrained_vm_larger1.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# train cityscapes using segmenter with pretrained backbone +# supports options to try + +declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes_larger1.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") + +num_cores=8 +tpu='local' +use_gpu=False + +for config_mode in "deterministic" # "scratch" "sngp" +do +config_file="${configfiles[$config_mode]}" +output_dir="gs://ub-ekb/segmenter/cityscapes/run3/$config_mode" +echo "${output_dir} ${config_file}" +python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu + +done From 8e7761a59e8f8fffbaf3f4e6119366e4f344069c Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Mon, 6 Dec 2021 22:44:24 +0000 Subject: [PATCH 025/150] update config files for 512 run' --- ...=> imagenet21k_segmenter_cityscapes512.py} | 0 ...magenet21k_segmenter_cityscapes_sngp512.py | 153 ++++++++++++++++++ .../experiments/segmenter_cityscapes512.py | 153 ++++++++++++++++++ ..._vm_larger1.sh => run_pretrained_vm512.sh} | 4 +- 4 files changed, 308 insertions(+), 2 deletions(-) rename experimental/cityscapes/experiments/{imagenet21k_segmenter_cityscapes_larger1.py => imagenet21k_segmenter_cityscapes512.py} (100%) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py create mode 100644 experimental/cityscapes/experiments/segmenter_cityscapes512.py rename experimental/cityscapes/{run_pretrained_vm_larger1.sh => run_pretrained_vm512.sh} (68%) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py similarity index 100% rename from experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger1.py rename to experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py new file mode 100644 index 000000000..53de0e270 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/segmenter_cityscapes512.py b/experimental/cityscapes/experiments/segmenter_cityscapes512.py new file mode 100644 index 000000000..3dc9a7c99 --- /dev/null +++ b/experimental/cityscapes/experiments/segmenter_cityscapes512.py @@ -0,0 +1,153 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_pretrained_vm_larger1.sh b/experimental/cityscapes/run_pretrained_vm512.sh similarity index 68% rename from experimental/cityscapes/run_pretrained_vm_larger1.sh rename to experimental/cityscapes/run_pretrained_vm512.sh index 8ece87d6e..4fac4a271 100755 --- a/experimental/cityscapes/run_pretrained_vm_larger1.sh +++ b/experimental/cityscapes/run_pretrained_vm512.sh @@ -3,13 +3,13 @@ # train cityscapes using segmenter with pretrained backbone # supports options to try -declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes_larger1.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") +declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes512.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp512.py" ["scratch"]="experiments/segmenter_cityscapes512.py") num_cores=8 tpu='local' use_gpu=False -for config_mode in "deterministic" # "scratch" "sngp" +for config_mode in "sngp" "scratch" #"deterministic" # "scratch" "sngp" do config_file="${configfiles[$config_mode]}" output_dir="gs://ub-ekb/segmenter/cityscapes/run3/$config_mode" From 66a771226d175ce5a2335c03fff92a4a947a4431 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 6 Dec 2021 22:08:07 -0500 Subject: [PATCH 026/150] add script to train model w different splits -- compatible w scenic/city_split --- experimental/cityscapes/README.md | 3 +- experimental/cityscapes/call_deterministic.sh | 24 +-- .../imagenet21k_segmenter_cityscapes1.py | 4 +- .../imagenet21k_segmenter_cityscapes11.py | 155 ---------------- .../imagenet21k_segmenter_cityscapes2.py | 115 ------------ ..._segmenter_cityscapes_deterministic_25.py} | 26 ++- ..._segmenter_cityscapes_deterministic_50.py} | 41 +++-- ..._segmenter_cityscapes_deterministic_75.py} | 80 ++++---- ...imagenet21k_segmenter_cityscapes_gp_25.py} | 83 +++++++-- .../imagenet21k_segmenter_cityscapes_gp_50.py | 173 ++++++++++++++++++ .../imagenet21k_segmenter_cityscapes_gp_75.py | 173 ++++++++++++++++++ .../cityscapes/run_deterministic_splits_vm.sh | 38 ++++ 12 files changed, 567 insertions(+), 348 deletions(-) delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py rename experimental/cityscapes/experiments/{imagenet21k_segmenter_cityscapes_larger.py => splits/imagenet21k_segmenter_cityscapes_deterministic_25.py} (87%) rename experimental/cityscapes/experiments/{imagenet21k_segmenter_cityscapes13.py => splits/imagenet21k_segmenter_cityscapes_deterministic_50.py} (81%) rename experimental/cityscapes/experiments/{imagenet21k_segmenter_cityscapes12.py => splits/imagenet21k_segmenter_cityscapes_deterministic_75.py} (76%) rename experimental/cityscapes/experiments/{imagenet21k_segmenter_cityscapes3.py => splits/imagenet21k_segmenter_cityscapes_gp_25.py} (53%) create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py create mode 100755 experimental/cityscapes/run_deterministic_splits_vm.sh diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 974649c81..2dcb695c5 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -3,4 +3,5 @@ [x] include loading cityscapes dataset [x] include deterministic training for segmenter -[ ] include init from pretrained backbone +[x] include init from pretrained backbone +[ ] include loading train split \ No newline at end of file diff --git a/experimental/cityscapes/call_deterministic.sh b/experimental/cityscapes/call_deterministic.sh index 5a84bff6a..23eb6dbac 100755 --- a/experimental/cityscapes/call_deterministic.sh +++ b/experimental/cityscapes/call_deterministic.sh @@ -1,22 +1,24 @@ -#!/bin/sh - -config='experiments/imagenet21k_segmenter_cityscapes.py' -use_gpu=False +#!/bin/bash +echo if [ "$(uname)" == "Darwin" ]; then + echo "Debug On mac" # Do something under Mac OS X platform output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" + config_file="experiments/splits/imagenet21k_segmenter_cityscapes.py" num_cores=0 tpu='None' -python deterministic.py -- --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" --num_cores=0 --use_gpu=False --tpu=False --config='experiments/imagenet21k_segmenter_cityscapes.py' + use_gpu=False + rng_seed=2 + python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ -elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then - # Do something under GNU/Linux platform - output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs" - tpu='local' - num_cores=8 - python3 deterministic.py -- --output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs" --num_cores=8 --tpu='local' --config='experiments/imagenet21k_segmenter_cityscapes.py' +elif [ "$(uname)" == "Linux" ]; then + echo "run run_pretrained_vm.sh instead" fi #python deterministic.py "--output_dir=$output_dir --num_cores=$num_cores --use_gpu=$use_gpu --tpu=$tpu --config=$config" diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py index 49399b459..27f6b5a17 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -26,7 +26,8 @@ DEBUG = 1 STRIDE = 4 -target_size=(512, 512) +target_size=(128, 128) + # debug on mac if DEBUG == 1: batch_size = 1 @@ -87,6 +88,7 @@ def get_config(): config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size + config.dataset_configs.train_split='train' # flags to debug scenic on mac config.dataset_configs.number_train_examples_debug = number_train_examples_debug config.dataset_configs.number_eval_examples_debug = number_train_examples_debug diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py deleted file mode 100644 index 141d81c3f..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes11.py +++ /dev/null @@ -1,155 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -include flag to init from checkpoints. - -command to run locally: -python deterministic.py --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs1" --num_cores=0 --use_gpu=False --tpu=None --config="experiments/imagenet21k_segmenter_cityscapes11.py" - -""" -# pylint: enable=line-too-long - -import ml_collections -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 1 -STRIDE = 16 - -target_size = (128, 128) - -# debug on mac -if DEBUG == 1: - batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -elif DEBUG == 5: - batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - config.num_classes = 19 - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'token' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # init - #config.init_from = ml_collections.ConfigDict() - #config.init_from.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" - #config.init_from.checkpoint_format = 'ub' - #config.init_from.restore_backbone_embedding = True - - # pretrained backbone - config.load_pretrained_backbone = True - config.pretrained_backbone_configs = ml_collections.ConfigDict() - config.pretrained_backbone_configs.checkpoint_path = "gs://ub-data/ImageNet21k_ViT-B16_ImagetNet21k_ViT-B_16_28592399.npz" - config.pretrained_backbone_configs.checkpoint_format = "ub" - - # doesn't work? - #config.pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - - # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' #* cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps #200 - - # extra - config.args = {} - - return config - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py deleted file mode 100644 index dd62f4b24..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes2.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -train model on all data for 1 epoch -""" -# pylint: enable=line-too-long - -import ml_collections -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -#STRIDE = 4 - -target_size =(128, 128) -STRIDE=16 -batch_size=8 -num_training_epochs = 1 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # config following scenic - config.num_classes = 19 - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps #200 - - # extra - config.args = {} - - return config - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py similarity index 87% rename from experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py rename to experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py index ce2d94d96..f775673cd 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_larger.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py @@ -23,7 +23,26 @@ _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 0 -target_size = (256, 256) +TRAIN_PROP=25 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) LOAD_PRETRAINED_BACKBONE = True PRETRAIN_BACKBONE_TYPE = 'base' @@ -50,6 +69,7 @@ def get_config(): config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split # flags to debug scenic on mac #config.dataset_configs.number_train_examples_debug = number_train_examples_debug @@ -94,7 +114,7 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size #steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. @@ -122,7 +142,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 # extra config.args = {} diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py similarity index 81% rename from experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py rename to experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py index acdb5b085..f35771fad 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes13.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py @@ -21,10 +21,28 @@ import ml_collections _CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 5 -STRIDE = 16 -target_size = (128, 128) - +DEBUG = 0 + +TRAIN_PROP=50 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) LOAD_PRETRAINED_BACKBONE = True PRETRAIN_BACKBONE_TYPE = 'base' @@ -45,16 +63,17 @@ def get_config(): """Config for cityscapes segmentation.""" config = ml_collections.ConfigDict() - config.experiment_name = 'cityscapes_segvit_ub' + config.experiment_name = 'cityscapes_segvit_ub_init' #dataset config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic # model @@ -95,13 +114,13 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = number_train_examples_debug // config.batch_size + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 @@ -123,7 +142,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 # extra config.args = {} diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py similarity index 76% rename from experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py rename to experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py index dd8b56e34..592abc3e5 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes12.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py @@ -16,66 +16,68 @@ # pylint: disable=line-too-long r"""Segmenter + cityscapes. -command to run locally: -python deterministic.py --output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs1" --num_cores=0 --use_gpu=False --tpu=None --config="experiments/imagenet21k_segmenter_cityscapes12.py" - - """ # pylint: enable=line-too-long import ml_collections _CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 5 -STRIDE = 16 -target_size = (128, 128) - +DEBUG = 0 + +TRAIN_PROP=75 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) LOAD_PRETRAINED_BACKBONE = True PRETRAIN_BACKBONE_TYPE = 'base' -# debug on mac -if DEBUG == 1: - batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 - num_training_epochs = 1 - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 - -elif DEBUG == 5: - batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 +if DEBUG ==5: + number_train_examples_debug = 16 def get_config(): """Config for cityscapes segmentation.""" config = ml_collections.ConfigDict() - config.experiment_name = 'cityscapes_segvit_ub' + config.experiment_name = 'cityscapes_segvit_ub_init' #dataset config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic # model - config.model_name = 'segmenter_mini' + config.model_name = 'segmenter_pretrained_mini' config.model = ml_collections.ConfigDict() config.patches = ml_collections.ConfigDict() @@ -112,13 +114,13 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = number_train_examples_debug // config.batch_size + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' config.lr_configs.warmup_steps = 1 * steps_per_epoch config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch config.lr_configs.base_learning_rate = 1e-4 @@ -140,7 +142,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 # extra config.args = {} diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py similarity index 53% rename from experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py rename to experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py index d00c3b4ef..84eab1871 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py @@ -20,12 +20,32 @@ # pylint: enable=line-too-long import ml_collections -#import get_fewshot # local file import - _CITYSCAPES_TRAIN_SIZE = 2975 -#STRIDE = 4 +DEBUG = 0 + +TRAIN_PROP=25 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' -target_size =(128, 128) STRIDE=16 batch_size=8 num_training_epochs = 100 # ml_collections.FieldReference(100) @@ -36,38 +56,53 @@ num_layers = 12 hidden_size = 768 +if DEBUG ==5: + number_train_examples_debug = 16 def get_config(): - """Config for training segmenter on """ + """Config for cityscapes segmentation.""" config = ml_collections.ConfigDict() - config.experiment_name = 'cityscapes_segvit_ub' + config.experiment_name = 'cityscapes_segvit_ub_init' + #dataset config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic - config.num_classes = 19 + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() config.patches = ml_collections.ConfigDict() config.patches.size = (STRIDE, STRIDE) config.backbone_configs = ml_collections.ConfigDict() config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. config.backbone_configs.mlp_dim = mlp_dim config.backbone_configs.num_heads = num_heads config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size + #decoder config.decoder_configs = ml_collections.ConfigDict() config.decoder_configs.type = 'linear' # training + config.trainer_name = 'segvit_trainer' config.optimizer = 'adam' config.optimizer_configs = ml_collections.ConfigDict() config.l2_decay_factor = 0.0 @@ -79,7 +114,8 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() @@ -93,6 +129,10 @@ def get_config(): config.model_dtype_str = 'float32' config.data_dtype_str = 'float32' + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + #logging config.write_summary = True # write TB and/or XM summary config.write_xm_measurements = True # write XM measurements @@ -102,7 +142,7 @@ def get_config(): config.debug_train = True # debug mode during training config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps #200 + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 # extra config.args = {} @@ -110,5 +150,24 @@ def get_config(): return config +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + def get_sweep(hyper): return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py new file mode 100644 index 000000000..70d200f42 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=50 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py new file mode 100644 index 000000000..c0d3ae11d --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=75 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh new file mode 100755 index 000000000..1f4cdedf1 --- /dev/null +++ b/experimental/cityscapes/run_deterministic_splits_vm.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# train cityscapes using segmenter with pretrained backbone +# deterministic splits + +#declare -A configfiles=( [75]="experiments/splits/imagenet21k_segmenter_cityscapes75.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") + +function get_config() +{ + local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" + echo "$config_file_name" +} + +num_cores=8 +tpu='local' +use_gpu=False + + +for rng_seed in 0 +do +for train_mode in "deterministic" "gp" +do +for train_split in 75 50 25 +do +config_file=$(get_config $train_mode $train_split) # or result=`myfunc` +run_name="${train_mode}_split${train_split}_seed${rng_seed}" +output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits/${run_name}" +echo "${output_dir} ${config_file}" +python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --tpu=$tpu +done +done +done +exit From 6697ee35912d2b94bbcd5fdea6859767f3519554 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 7 Dec 2021 14:12:38 +0000 Subject: [PATCH 027/150] add run with 10 and 100% of data' --- ...k_segmenter_cityscapes_deterministic_10.py | 173 ++++++++++++++++++ ..._segmenter_cityscapes_deterministic_100.py | 173 ++++++++++++++++++ .../imagenet21k_segmenter_cityscapes_gp_10.py | 173 ++++++++++++++++++ ...imagenet21k_segmenter_cityscapes_gp_100.py | 173 ++++++++++++++++++ .../cityscapes/run_deterministic_splits_vm.sh | 2 +- 5 files changed, 693 insertions(+), 1 deletion(-) create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py new file mode 100644 index 000000000..ad5ed55cd --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=25 + +# we will have 4 version of train split +if TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py new file mode 100644 index 000000000..6f3ac1233 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=100 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py new file mode 100644 index 000000000..200a04c83 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=10 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py new file mode 100644 index 000000000..64b8351c5 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=100 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'gp' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh index 1f4cdedf1..2b4de57fa 100755 --- a/experimental/cityscapes/run_deterministic_splits_vm.sh +++ b/experimental/cityscapes/run_deterministic_splits_vm.sh @@ -20,7 +20,7 @@ for rng_seed in 0 do for train_mode in "deterministic" "gp" do -for train_split in 75 50 25 +for train_split in 10 100 75 50 25 do config_file=$(get_config $train_mode $train_split) # or result=`myfunc` run_name="${train_mode}_split${train_split}_seed${rng_seed}" From 6cc984ccc87fccf35cf4433070325725b9b66405 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 7 Dec 2021 09:46:30 -0500 Subject: [PATCH 028/150] fix bug with deterministic 10% train split, add config to train from scratch for different splits --- ...k_segmenter_cityscapes_deterministic_10.py | 2 +- ...enet21k_segmenter_cityscapes_scratch_10.py | 173 ++++++++++++++++++ ...net21k_segmenter_cityscapes_scratch_100.py | 173 ++++++++++++++++++ ...enet21k_segmenter_cityscapes_scratch_25.py | 173 ++++++++++++++++++ ...enet21k_segmenter_cityscapes_scratch_50.py | 173 ++++++++++++++++++ ...enet21k_segmenter_cityscapes_scratch_75.py | 173 ++++++++++++++++++ .../cityscapes/run_deterministic_splits_vm.sh | 2 +- 7 files changed, 867 insertions(+), 2 deletions(-) create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py create mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py index ad5ed55cd..17cc68516 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -23,7 +23,7 @@ _CITYSCAPES_TRAIN_SIZE = 2975 DEBUG = 0 -TRAIN_PROP=25 +TRAIN_PROP=10 # we will have 4 version of train split if TRAIN_PROP == 10: diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py new file mode 100644 index 000000000..7218642af --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=10 + +# we will have 4 version of train split +if TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py new file mode 100644 index 000000000..527df645f --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=100 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py new file mode 100644 index 000000000..ca3716e41 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=25 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py new file mode 100644 index 000000000..2f6a051ef --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=50 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py new file mode 100644 index 000000000..e2b7bc519 --- /dev/null +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py @@ -0,0 +1,173 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=75 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=16 +batch_size=8 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 3072 +num_heads = 12 +num_layers = 12 +hidden_size = 768 + +if DEBUG ==5: + number_train_examples_debug = 16 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + #config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + #steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh index 2b4de57fa..921822335 100755 --- a/experimental/cityscapes/run_deterministic_splits_vm.sh +++ b/experimental/cityscapes/run_deterministic_splits_vm.sh @@ -18,7 +18,7 @@ use_gpu=False for rng_seed in 0 do -for train_mode in "deterministic" "gp" +for train_mode in "scratch" "deterministic" "gp" do for train_split in 10 100 75 50 25 do From 51f0fe2ec359beae1ee147d6f644cf523461476e Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 10 Dec 2021 12:21:50 -0500 Subject: [PATCH 029/150] update experimental configs --- .../imagenet21k_segmenter_cityscapes_deterministic_10.py | 2 +- .../imagenet21k_segmenter_cityscapes_deterministic_100.py | 2 +- .../imagenet21k_segmenter_cityscapes_deterministic_25.py | 2 +- .../imagenet21k_segmenter_cityscapes_deterministic_50.py | 2 +- .../imagenet21k_segmenter_cityscapes_deterministic_75.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_gp_10.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_gp_100.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_gp_25.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_gp_50.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_gp_75.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_scratch_10.py | 4 ++-- .../splits/imagenet21k_segmenter_cityscapes_scratch_100.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_scratch_25.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_scratch_50.py | 2 +- .../splits/imagenet21k_segmenter_cityscapes_scratch_75.py | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py index 17cc68516..37c473bed 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -26,7 +26,7 @@ TRAIN_PROP=10 # we will have 4 version of train split -if TRAIN_PROP == 10: +if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' elif TRAIN_PROP == 75: diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py index 6f3ac1233..aefdd5acc 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py @@ -25,7 +25,7 @@ TRAIN_PROP=100 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py index f775673cd..2dc1acc70 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py @@ -25,7 +25,7 @@ TRAIN_PROP=25 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py index f35771fad..32bcbad82 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py @@ -25,7 +25,7 @@ TRAIN_PROP=50 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py index 592abc3e5..4a9524030 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py @@ -25,7 +25,7 @@ TRAIN_PROP=75 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py index 200a04c83..eadb161b7 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py @@ -25,7 +25,7 @@ TRAIN_PROP=10 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py index 64b8351c5..6d3543d5c 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py @@ -25,7 +25,7 @@ TRAIN_PROP=100 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py index 84eab1871..541a1e070 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py @@ -25,7 +25,7 @@ TRAIN_PROP=25 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py index 70d200f42..ab0e2f50a 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py @@ -25,7 +25,7 @@ TRAIN_PROP=50 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py index c0d3ae11d..da1adb5da 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py @@ -25,7 +25,7 @@ TRAIN_PROP=75 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py index 7218642af..bae6af57e 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py @@ -25,8 +25,8 @@ TRAIN_PROP=10 -# we will have 4 version of train split -if TRAIN_PROP == 10: +# we will have 5 version of train split +if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' elif TRAIN_PROP == 75: diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py index 527df645f..615a5312c 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py @@ -25,7 +25,7 @@ TRAIN_PROP=100 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py index ca3716e41..8e301c7f7 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py @@ -25,7 +25,7 @@ TRAIN_PROP=25 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py index 2f6a051ef..9c65fbaa0 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py @@ -25,7 +25,7 @@ TRAIN_PROP=50 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py index e2b7bc519..ea4030188 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py @@ -25,7 +25,7 @@ TRAIN_PROP=75 -# we will have 4 version of train split +# we will have 5 version of train split if TRAIN_PROP == 100: _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE train_split = 'train' From 1fa4602e503df63e78d04a1c0d68cc99e3ee6b96 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 10 Dec 2021 12:44:52 -0500 Subject: [PATCH 030/150] update script to run experiments --- .../cityscapes/run_deterministic_splits_vm.sh | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh index 921822335..082dc9b90 100755 --- a/experimental/cityscapes/run_deterministic_splits_vm.sh +++ b/experimental/cityscapes/run_deterministic_splits_vm.sh @@ -1,9 +1,6 @@ #!/bin/bash -# train cityscapes using segmenter with pretrained backbone -# deterministic splits - -#declare -A configfiles=( [75]="experiments/splits/imagenet21k_segmenter_cityscapes75.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") +# train segmenter model on cityscapes using different pretrained backbones for different splits function get_config() { @@ -16,17 +13,17 @@ tpu='local' use_gpu=False -for rng_seed in 0 +for rng_seed in 0 1 2 3 do -for train_mode in "scratch" "deterministic" "gp" +for train_mode in "deterministic" "gp" "scratch" do -for train_split in 10 100 75 50 25 +for train_split in 100 75 50 25 10 do -config_file=$(get_config $train_mode $train_split) # or result=`myfunc` +config_file=$(get_config $train_mode $train_split) run_name="${train_mode}_split${train_split}_seed${rng_seed}" -output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits/${run_name}" -echo "${output_dir} ${config_file}" -python3 deterministic.py --output_dir=${output_dir} \ +output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" +echo "Running experiment ${output_dir_ckpt}" +python3 deterministic.py --output_dir=${output_dir_ckpt} \ --num_cores=$num_cores \ --use_gpu=$use_gpu \ --config=${config_file} \ @@ -35,4 +32,3 @@ python3 deterministic.py --output_dir=${output_dir} \ done done done -exit From ef401ec8b0dca3c42586fac35dc00f9475c5f2ed Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 10 Dec 2021 18:38:11 -0500 Subject: [PATCH 031/150] fix local debugger for deterministic experiments --- experimental/cityscapes/call_deterministic.sh | 2 +- .../experiments/toy/segmenter_cityscapes.py | 161 ++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 experimental/cityscapes/experiments/toy/segmenter_cityscapes.py diff --git a/experimental/cityscapes/call_deterministic.sh b/experimental/cityscapes/call_deterministic.sh index 23eb6dbac..ec73bb6bd 100755 --- a/experimental/cityscapes/call_deterministic.sh +++ b/experimental/cityscapes/call_deterministic.sh @@ -5,7 +5,7 @@ if [ "$(uname)" == "Darwin" ]; then echo "Debug On mac" # Do something under Mac OS X platform output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" - config_file="experiments/splits/imagenet21k_segmenter_cityscapes.py" + config_file="experiments/toy/segmenter_cityscapes.py" num_cores=0 tpu='None' use_gpu=False diff --git a/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py b/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py new file mode 100644 index 000000000..badb16ebf --- /dev/null +++ b/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py @@ -0,0 +1,161 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 1 +STRIDE = 4 + +target_size=(512, 512) +# debug on mac +if DEBUG == 1: + batch_size = 1 + number_train_examples_debug = 10 + number_eval_examples_debug = 10 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/small vit +elif DEBUG == 2: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 +# debug on v3-8: 1 epoch/16 samples/regular vit +elif DEBUG == 3: + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif DEBUG == 4: + target_size =(128, 128) + STRIDE=16 + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + # flags to debug scenic on mac + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + config.num_classes = 19 + config.model_name = 'segmenter_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size + steps_per_epoch = number_train_examples_debug // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = log_eval_steps #200 + + # extra + config.args = {} + + return config + + +def get_sweep(hyper): + return hyper.product([]) \ No newline at end of file From a92a9c6ab988a8f35afb5fc2bb3759c22eb3ecb8 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 3 Jan 2022 23:30:13 -0500 Subject: [PATCH 032/150] updated readme --- experimental/cityscapes/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 2dcb695c5..9ee18a05c 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -1,7 +1,7 @@ # Log -[x] include loading cityscapes dataset -[x] include deterministic training for segmenter -[x] include init from pretrained backbone -[ ] include loading train split \ No newline at end of file +[x] add cityscapes dataset loader. +[x] add deterministic training for segmenter. +[x] include transfer learning option: init from pretrained backbone. +[x] include option to train vit+ model using different train split. From 52547804a012bcb93495199c92063a59bcb91784 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 6 Jan 2022 05:17:45 -0500 Subject: [PATCH 033/150] add eval code to store model outputs --- experimental/cityscapes/README.md | 4 + .../cityscapes/custom_segmentation_eval.py | 424 ++++++++++++++++++ experimental/cityscapes/deterministic_eval.py | 161 +++++++ .../cityscapes/run_deterministic_eval.sh | 33 ++ experimental/cityscapes/run_pretrained.sh | 2 +- 5 files changed, 623 insertions(+), 1 deletion(-) create mode 100644 experimental/cityscapes/custom_segmentation_eval.py create mode 100644 experimental/cityscapes/deterministic_eval.py create mode 100755 experimental/cityscapes/run_deterministic_eval.sh diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 9ee18a05c..1a13454d0 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -5,3 +5,7 @@ [x] add deterministic training for segmenter. [x] include transfer learning option: init from pretrained backbone. [x] include option to train vit+ model using different train split. +[] add option to store logits from trained model. +[] used stored logits to calculate uncertainty as entropy of softmax distribution. +[] calculate uncertainty metrics + diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py new file mode 100644 index 000000000..9782291e4 --- /dev/null +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -0,0 +1,424 @@ +""" +Custom segmentation_trainer.py + +# cannot import train_step, eval_step due to tuple segmenter output in ub implementation +Minor changes to account for ub models which ouput a tuple (logits, dict) +""" + +import functools +from typing import Any, Callable, Dict, Tuple, Optional, Type + +import flax.linen as nn +import jax +import jax.numpy as jnp +import jax.profiler +import ml_collections +import numpy as np +from absl import logging +from clu import metric_writers +from clu import periodic_actions +from flax import jax_utils +from jax.experimental.optimizers import clip_grads + +from scenic.dataset_lib import dataset_utils +from scenic.model_lib.base_models import base_model +from scenic.train_lib import lr_schedules +from scenic.train_lib import optimizers +from scenic.train_lib import train_utils +import h5py +import os +# instead of importing we use local functions +# from scenic.train_lib.segmentation_trainer import train_step, eval_step, _draw_side_by_side +Batch = Dict[str, jnp.ndarray] +MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], + Dict[str, Tuple[float, int]]] +LossFn = Callable[[jnp.ndarray, Batch, Optional[jnp.ndarray]], float] + +from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix +from flax.training.checkpoints import restore_checkpoint as flax_restore_checkpoint +from scenic.train_lib import pretrain_utils + +from pretrainer_utils import load_bb_config +from pathlib import Path + +def eval_step1( + *, + flax_model: nn.Module, + train_state: train_utils.TrainState, + batch: Batch, + metrics_fn: MetricFn, + debug: Optional[bool] = False +) -> Tuple[Batch, jnp.ndarray, Dict[str, Tuple[float, int]], jnp.ndarray]: + """Runs a single step of training. + + Note that in this code, the buffer of the second argument (batch) is donated + to the computation. + + Assumed API of metrics_fn is: + ```metrics = metrics_fn(logits, batch) + where batch is yielded by the batch iterator, and metrics is a dictionary + mapping metric name to a vector of per example measurements. eval_step will + aggregate (by summing) all per example measurements and divide by the + aggregated normalizers. For each given metric we compute: + 1/N sum_{b in batch_iter} metric(b), where N is the sum of normalizer + over all batches. + + Args: + flax_model: A Flax model. + train_state: TrainState, the state of training including the current + global_step, model_state, rng, and optimizer. The buffer of this argument + can be donated to the computation. + batch: A single batch of data. a metrics function, that given logits and + batch of data, calculates the metrics as well as the loss. + metrics_fn: A metrics function, that given logits and batch of data, + calculates the metrics as well as the loss. + debug: Whether the debug mode is enabled during evaluation. + `debug=True` enables model specific logging/storing some values using + jax.host_callback. + + Returns: + Batch, predictions and calculated metrics. + """ + variables = { + 'params': train_state.optimizer.target, + **train_state.model_state + } + (logits, _) = flax_model.apply( + variables, batch['inputs'], train=False, mutable=False, debug=debug) + metrics = metrics_fn(logits, batch) + + confusion_matrix = get_confusion_matrix( + labels=batch['label'], logits=logits, batch_mask=batch['batch_mask']) + + # Collect predictions and batches from all hosts. + predictions = jnp.argmax(logits, axis=-1) + predictions = jax.lax.all_gather(predictions, 'batch') + batch = jax.lax.all_gather(batch, 'batch') + confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') + + return batch, logits, predictions, metrics, confusion_matrix + + +def eval1( + *, + rng: jnp.ndarray, + config: ml_collections.ConfigDict, + model_cls: Type[base_model.BaseModel], + dataset: dataset_utils.Dataset, + workdir: str, + writer: metric_writers.MetricWriter, +) -> Tuple[train_utils.TrainState, Dict[str, Any], Dict[str, Any]]: + """Main training loop lives in this function. + + Given the model class and dataset, it prepares the items needed to run the + training, including the TrainState. + + Args: + rng: Jax rng key. + config: Configurations of the experiment. + model_cls: Model class; A model has a flax_module, a loss_fn, and a + metrics_fn associated with it. + dataset: The dataset that has train_iter, eval_iter, meta_data, and + optionally, test_iter. + workdir: Directory for checkpointing. + writer: CLU metrics writer instance. + + Returns: + train_state that has the state of training (including current + global_step, model_state, rng, and the optimizer), train_summary + and eval_summary which are dict of metrics. These outputs are used for + regression testing. + + Timeline: + - Updated from scenic.train_lib.segmentation_trainer.train + """ + lead_host = jax.process_index() == 0 + # Build the loss_fn, metrics, and flax_model. + model = model_cls(config, dataset.meta_data) + + # Initialize model. + rng, init_rng = jax.random.split(rng) + (params, model_state, num_trainable_params, + gflops) = train_utils.initialize_model( + model_def=model.flax_model, + input_spec=[(dataset.meta_data['input_shape'], + dataset.meta_data.get('input_dtype', jnp.float32))], + config=config, + rngs=init_rng) + + # Create optimizer. + # We jit this, such that the arrays that are created are created on the same + # device as the input is, in this case the CPU. Else they'd be on device[0]. + optimizer = jax.jit( + optimizers.get_optimizer(config).create, backend='cpu')( + params) + rng, train_rng = jax.random.split(rng) + train_state = train_utils.TrainState( + global_step=0, + optimizer=optimizer, + model_state=model_state, + rng=train_rng, + accum_train_time=0) + start_step = train_state.global_step + + # Load pretrained backbone + if start_step == 0 and config.get('load_pretrained_backbone', False): + # TODO(kellybuchanan): check out partial loader in + # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# + + bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') + checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') + # bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') + + # Loader from scenic + # cannot restore using flax + # Mathias suggested to try flax_restore_checkpoint + # bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) + # but we get an error *** msgpack.exceptions.ExtraData: unpack(b) received extra data. + + # TODO(kellybuchanan): read config file directly from bb_model_cfg_file + restored_model_cfg = load_bb_config(config) + + if checkpoint_format == 'ub': + # import pdb; pdb.set_trace() + # load params from checkpoint + bb_train_state = pretrain_utils.convert_bigvision_to_scenic_checkpoint( + checkpoint_path=bb_checkpoint_path, + convert_to_linen=False) + + # option 1: failed as variables are a frozen dictionary + # could be used with flax.core.unfreeze, flax.core.freeze + train_state = model.init_backbone_from_train_state(train_state, + bb_train_state, + restored_model_cfg, + model_prefix_path=['backbone']) + + # option2: it fails for embeddings as this mode + # doesn't allow to specify loaded params . + # model_prefix_path = ['backbone'] + # train_state = pretrain_utils.init_from_pretrain_state( + # train_state, bb_train_state, model_prefix_path=model_prefix_path) + + + else: + raise NotImplementedError("") + + elif start_step == 0: + logging.info('Not restoring from any pretrained_backbone.') + + if config.checkpoint: + train_state, start_step = train_utils.restore_checkpoint(workdir, train_state) + else: + logging.info('Not restoring from any checkpoints.') + + # Replicate the optimzier, state, and rng. + train_state = jax_utils.replicate(train_state) + del params # Do not keep a copy of the initial params. + + # Calculate the total number of training steps. + total_steps, steps_per_epoch = train_utils.get_num_training_steps( + config, dataset.meta_data) + # Get learning rate scheduler. + #learning_rate_fn = lr_schedules.get_learning_rate_fn(config) + + ############### EVALUATION CODE ################# + + eval_step_pmapped = jax.pmap( + functools.partial( + eval_step1, + flax_model=model.flax_model, + metrics_fn=model.get_metrics_fn('validation'), + debug=config.debug_eval), + axis_name='batch', + # We can donate the eval_batch's buffer. + ) + + # Ceil rounding such that we include the last incomplete batch. + total_eval_steps = int( + np.ceil(dataset.meta_data['num_eval_examples'] / config.batch_size)) + steps_per_eval = config.get('steps_per_eval') or total_eval_steps + + batch_size = config.batch_size + num_eval_examples = dataset.meta_data['num_eval_examples'] + + store_logits_fname = os.path.join(workdir, "logits", "val.h5py") + + if not Path(store_logits_fname).parent.exists(): + os.makedirs(str(Path(store_logits_fname).parent)) + #import pdb;pdb.set_trace() + # assert not os.path.isfile(store_logits_fname) + # with h5py.File(args.store_logits_fname, 'w', libver='latest', swmr=True) as f: + f = h5py.File(store_logits_fname, 'w', libver='latest') + # f.swmr_mode = True # single write multi-read + input_shape = dataset.meta_data['input_shape'][1:3] + num_classes = dataset.meta_data['num_classes'] + logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) + inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) + labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) + predictions_out = f.create_dataset('predictions', (num_eval_examples,) + input_shape) + + def evaluate(train_state: train_utils.TrainState, + step: int) -> Dict[str, Any]: + eval_metrics = [] + eval_all_confusion_mats = [] + # Sync model state across replicas. + train_state = train_utils.sync_model_state_across_replicas(train_state) + + def to_cpu(x): + return jax.device_get(dataset_utils.unshard(jax_utils.unreplicate(x))) + + for step_ in range(steps_per_eval): + eval_batch = next(dataset.valid_iter) + e_batch, \ + e_logits, \ + e_predictions, \ + e_metrics, \ + confusion_matrix = eval_step_pmapped(train_state=train_state, batch=eval_batch) + + eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) + # Evaluate global metrics on one of the hosts (lead_host), but given + # intermediate values collected from all hosts. + if lead_host and global_metrics_fn is not None: + # Collect data to be sent for computing global metrics. + eval_all_confusion_mats.append(to_cpu(confusion_matrix)) + + #import pdb;pdb.set_trace() + # store logits, wbu batch_size + start_idx = step_ * batch_size + end_idx = start_idx + batch_size + inputs_out[start_idx:end_idx] = to_cpu(e_batch)['inputs'] + labels_out[start_idx:end_idx] = to_cpu(e_batch)['label'] + logits_out[start_idx:end_idx] = to_cpu(e_logits) + predictions_out[start_idx:end_idx] = to_cpu(e_predictions) + + eval_global_metrics_summary = {} + if lead_host and global_metrics_fn is not None: + eval_global_metrics_summary = global_metrics_fn(eval_all_confusion_mats, + dataset.meta_data) + + ############### LOG EVAL SUMMARY ############### + + """ + eval_summary = train_utils.log_eval_summary( + step=step, + eval_metrics=eval_metrics, + extra_eval_summary=eval_global_metrics_summary, + # writer=writer + ) + # Visualize val predictions for one batch: + if lead_host: + images = _draw_side_by_side(to_cpu(e_batch), to_cpu(e_predictions)) + example_viz = { + f'val/example_{i}': image[None, ...] for i, image in enumerate(images) + } + writer.write_images(step, example_viz) + + writer.flush() + """ + eval_summary = 0 + del eval_metrics + return eval_summary + + log_eval_steps = config.get('log_eval_steps') or steps_per_epoch + if not log_eval_steps: + raise ValueError("'log_eval_steps' should be specified in the config.") + log_summary_steps = config.get('log_summary_steps') or log_eval_steps + checkpoint_steps = config.get('checkpoint_steps') or log_eval_steps + + train_metrics, extra_training_logs = [], [] + train_summary, eval_summary = None, None + global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error + + chrono = train_utils.Chrono( + first_step=start_step, + total_steps=total_steps, + steps_per_epoch=steps_per_epoch, + global_bs=config.batch_size, + accum_train_time=int(jax_utils.unreplicate(train_state.accum_train_time))) + + logging.info('Starting training loop at step %d.', start_step + 1) + report_progress = periodic_actions.ReportProgress( + num_train_steps=total_steps, + #writer=writer + ) + hooks = [report_progress] + if config.get('xprof', True) and lead_host: + hooks.append(periodic_actions.Profile(num_profile_steps=5, logdir=workdir)) + + if start_step == 0: + raise NotImplementedError("start step should be larger") + step0_log = {'num_trainable_params': num_trainable_params} + if gflops: + step0_log['gflops'] = gflops + writer.write_scalars(1, step0_log) + + for step in range(start_step + 1, total_steps + 2): + with jax.profiler.StepTraceContext('train', sfLtep_num=step): + train_batch = next(dataset.train_iter) + + # train_state, t_metrics, lr, train_predictions = train_step_pmapped( + # train_state=train_state, batch=train_batch) + # This will accumulate metrics in TPU memory up to the point that we log + # them. This is no problem for small metrics but may be a problem for + # large (e.g. segmentation) metrics. An alternative is to set + # `log_summary_steps` to a small number, or to use + # `train_utils.unreplicate_and_get` here instead of right before writing + # summaries, but that means in each step, we have data transfer between + # tpu and host, which might slow down the training. + # train_metrics.append(t_metrics) + # Additional training logs: learning rate: + # extra_training_logs.append({'learning_rate': lr}) + + for h in hooks: + h(step) + chrono.pause() # Below are once-in-a-while ops -> pause. + """ + if step % log_summary_steps == 0 or (step == total_steps): + ############### LOG TRAIN SUMMARY ############### + if lead_host: + chrono.tick(step, writer=writer) + # Visualize segmentations using side-by-side gt-pred images: + images = _draw_side_by_side( + jax.device_get(dataset_utils.unshard(train_batch)), + jax.device_get(dataset_utils.unshard(train_predictions))) + example_viz = { + f'train/example_{i}': image[None, ...] + for i, image in enumerate(images) + } + writer.write_images(step, example_viz) + + train_summary = train_utils.log_train_summary( + step=step, + train_metrics=jax.tree_map(train_utils.unreplicate_and_get, + train_metrics), + extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, + extra_training_logs), + writer=writer) + # Reset metric accumulation for next evaluation cycle. + train_metrics, extra_training_logs = [], [] + """ + if (step % log_eval_steps == 0) or (step == total_steps): + with report_progress.timed('eval'): + # Sync model state across replicas (in case of having model state, e.g. + # batch statistic when using batch norm). + train_state = train_utils.sync_model_state_across_replicas(train_state) + eval_summary = evaluate(train_state, step) + """ + if ((step % checkpoint_steps == 0 and step > 0) or + (step == total_steps)) and config.checkpoint: + ################### CHECK POINTING ########################## + with report_progress.timed('checkpoint'): + # Sync model state across replicas. + train_state = train_utils.sync_model_state_across_replicas(train_state) + if lead_host: + train_state.replace( # pytype: disable=attribute-error + accum_train_time=chrono.accum_train_time) + train_utils.save_checkpoint(workdir, train_state) + """ + chrono.resume() # Un-pause now. + + f.close() + # Wait until computations are done before exiting. + jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() + # Return the train and eval summary after last step for regresesion testing. + return train_state, train_summary, eval_summary diff --git a/experimental/cityscapes/deterministic_eval.py b/experimental/cityscapes/deterministic_eval.py new file mode 100644 index 000000000..87c8a2dbd --- /dev/null +++ b/experimental/cityscapes/deterministic_eval.py @@ -0,0 +1,161 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Train vit model on cityscapes. + +Step 1: aim to train model on cityscapes for 1 step +# Runs with + +""" + +import os +import sys + +# %% +import jax +# %% +import tensorflow as tf +# %% +from absl import app +from absl import flags +from absl import logging +from ml_collections.config_flags import config_flags +from tensorflow.io import gfile + +import custom_models +import custom_segmentation_eval +# scenic dependencies for debugging +from scenic.train_lib import train_utils + +# import train_utils # local file import + +#%% +config_flags.DEFINE_config_file( + 'config', None, 'Training configuration.', lock_config=True) +flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') +flags.DEFINE_integer( + 'num_cores', default=None, help='Unused. How many devices being used.') +flags.DEFINE_boolean( + 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') +flags.DEFINE_string('tpu', None, + 'Unused. Name of the TPU. Only used if use_gpu is False.') +flags.DEFINE_string('dataset_service_address', None, + 'Address of the tf.data service') +FLAGS = flags.FLAGS + + +def write_note(note): + if jax.process_index() == 0: + logging.info('NOTE: %s', note) + + +from clu import metric_writers + + +def run(config, workdir): + """Prepares model, and dataset for training. + + This creates summary directories, summary writers, model definition, and + builds datasets to be sent to the main training script. + + Args: + config: ConfigDict; Hyper parameters. + workdir: string; Root directory for the experiment. + + Returns: + The outputs of trainer.train(), which are train_state, train_summary, and + eval_summary. + """ + lead_host = jax.process_index() == 0 + # set up the train_dir and log_dir + gfile.makedirs(workdir) + #workdir = os.path.join(workdir, 'trial') + #gfile.makedirs(workdir) + + summary_writer = None + if lead_host and config.write_summary: + tensorboard_dir = os.path.join(workdir, 'tb_summaries') + gfile.makedirs(tensorboard_dir) + # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) + summary_writer = metric_writers.SummaryWriter(tensorboard_dir) + + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + rng = jax.random.PRNGKey(config.rng_seed) + logging.info('rng: %s', rng) + + model_cls = custom_models.SegmenterSegmentationModel + + # ---------------------- + # Load dataset + # ---------------------- + data_rng, rng = jax.random.split(rng) + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + write_note('Loading dataset...') + + # TODO: update num_classes + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + return rng, model_cls, dataset, config, workdir, summary_writer + + +def main(config, output_dir): + + print('config') + print(config) + seed = config.get('rng_seed', 0) + rng = jax.random.PRNGKey(seed) + tf.random.set_seed(seed) + + print('workdir ', output_dir) + rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) + print('workdir ', workdir) + + # ---------------------- + # Eval function + # ---------------------- + eval_fn = custom_segmentation_eval.eval1 + + train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, + config=config, + workdir=output_dir, writer=summary_writer) + + print(train_summary) + return + + +if __name__ == '__main__': + # Adds jax flags to the program. + jax.config.config_with_absl() + + # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` + # function that returns values for tests and does not directly access flags, + # and then have `main` return None. + + def _main(unused_argv): + config = FLAGS.config + output_dir = FLAGS.output_dir + main(config, output_dir) + + app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/run_deterministic_eval.sh b/experimental/cityscapes/run_deterministic_eval.sh new file mode 100755 index 000000000..3e7512313 --- /dev/null +++ b/experimental/cityscapes/run_deterministic_eval.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +# train cityscapes using segmenter with pretrained backbone +# supports 2 options to + +if [ "$(uname)" = "Darwin" ] ; then + # Do something under Mac OS X platform + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' + output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" + num_cores=0 + tpu=False + use_gpu=False + python deterministic_eval.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + # --tpu=$tpu +elif [ "$(uname)" = "Linux" ]; then + echo "in Linux" + config_file='experiments/imagenet21k_segmenter_cityscapes13.py' + output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + num_cores=8 + tpu='local' + use_gpu=False + python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.batch_size=${batch_size} \ + --tpu=$tpu +fi + +#%% diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh index 049d5dc98..6d9fc616a 100755 --- a/experimental/cityscapes/run_pretrained.sh +++ b/experimental/cityscapes/run_pretrained.sh @@ -6,7 +6,7 @@ if [ "$(uname)" = "Darwin" ] ; then # Do something under Mac OS X platform - config_file='experiments/imagenet21k_segmenter_cityscapes12.py' + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" num_cores=0 tpu=False From 9afb73f6b26af3f1fd3a2b9956b19410dcdc8aa0 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 6 Jan 2022 05:24:38 -0500 Subject: [PATCH 034/150] add call to eval multiple splits --- .../run_deterministic_eval_splits_vm.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 experimental/cityscapes/run_deterministic_eval_splits_vm.sh diff --git a/experimental/cityscapes/run_deterministic_eval_splits_vm.sh b/experimental/cityscapes/run_deterministic_eval_splits_vm.sh new file mode 100644 index 000000000..0614f9698 --- /dev/null +++ b/experimental/cityscapes/run_deterministic_eval_splits_vm.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# train segmenter model on cityscapes using different pretrained backbones for different splits + +function get_config() +{ + local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" + echo "$config_file_name" +} + +num_cores=8 +tpu='local' +use_gpu=False + + +for rng_seed in 0 1 2 3 +do +for train_mode in "deterministic" "gp" "scratch" +do +for train_split in 100 75 50 25 10 +do +config_file=$(get_config $train_mode $train_split) +run_name="${train_mode}_split${train_split}_seed${rng_seed}" +output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" +echo "Running experiment ${output_dir_ckpt}" +python3 deterministic_eval.py --output_dir=${output_dir_ckpt} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --tpu=$tpu +done +done +done From e4a1a6ee46d0283e84bb9f91cf80137cc6bd480a Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Thu, 6 Jan 2022 11:21:37 +0000 Subject: [PATCH 035/150] update logit dimensionality --- .../cityscapes/custom_segmentation_eval.py | 22 +++++++++++-------- .../cityscapes/run_deterministic_splits_vm.sh | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py index 9782291e4..fee8a6a83 100644 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -96,6 +96,8 @@ def eval_step1( batch = jax.lax.all_gather(batch, 'batch') confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') + logits = jax.lax.all_gather(logits, 'batch') + return batch, logits, predictions, metrics, confusion_matrix @@ -239,8 +241,10 @@ def eval1( steps_per_eval = config.get('steps_per_eval') or total_eval_steps batch_size = config.batch_size - num_eval_examples = dataset.meta_data['num_eval_examples'] + #num_eval_examples = dataset.meta_data['num_eval_examples'] + num_eval_examples = int(steps_per_eval * config.batch_size) + # TODO(kellbuchanan): add compatibility w gcp bucket store_logits_fname = os.path.join(workdir, "logits", "val.h5py") if not Path(store_logits_fname).parent.exists(): @@ -255,7 +259,7 @@ def eval1( logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) - predictions_out = f.create_dataset('predictions', (num_eval_examples,) + input_shape) + #predictions_out = f.create_dataset('predictions', (num_eval_examples,) + input_shape) def evaluate(train_state: train_utils.TrainState, step: int) -> Dict[str, Any]: @@ -289,7 +293,7 @@ def to_cpu(x): inputs_out[start_idx:end_idx] = to_cpu(e_batch)['inputs'] labels_out[start_idx:end_idx] = to_cpu(e_batch)['label'] logits_out[start_idx:end_idx] = to_cpu(e_logits) - predictions_out[start_idx:end_idx] = to_cpu(e_predictions) + #predictions_out[start_idx:end_idx] = to_cpu(e_predictions) eval_global_metrics_summary = {} if lead_host and global_metrics_fn is not None: @@ -397,12 +401,12 @@ def to_cpu(x): # Reset metric accumulation for next evaluation cycle. train_metrics, extra_training_logs = [], [] """ - if (step % log_eval_steps == 0) or (step == total_steps): - with report_progress.timed('eval'): - # Sync model state across replicas (in case of having model state, e.g. - # batch statistic when using batch norm). - train_state = train_utils.sync_model_state_across_replicas(train_state) - eval_summary = evaluate(train_state, step) + #if (step % log_eval_steps == 0) or (step == total_steps): + with report_progress.timed('eval'): + # Sync model state across replicas (in case of having model state, e.g. + # batch statistic when using batch norm). + train_state = train_utils.sync_model_state_across_replicas(train_state) + eval_summary = evaluate(train_state, step) """ if ((step % checkpoint_steps == 0 and step > 0) or (step == total_steps)) and config.checkpoint: diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh index 082dc9b90..fefeb6ba7 100755 --- a/experimental/cityscapes/run_deterministic_splits_vm.sh +++ b/experimental/cityscapes/run_deterministic_splits_vm.sh @@ -13,7 +13,7 @@ tpu='local' use_gpu=False -for rng_seed in 0 1 2 3 +for rng_seed in 4 do for train_mode in "deterministic" "gp" "scratch" do From 7f916b0e57c3edba09b293c29930842e734bec74 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Thu, 6 Jan 2022 17:11:27 +0000 Subject: [PATCH 036/150] update eval code to write directly to bucket --- .../cityscapes/custom_segmentation_eval.py | 16 +++++++++------- .../run_deterministic_eval_splits_vm.sh | 10 +++++++++- 2 files changed, 18 insertions(+), 8 deletions(-) mode change 100644 => 100755 experimental/cityscapes/run_deterministic_eval_splits_vm.sh diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py index fee8a6a83..d0a08c662 100644 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -246,9 +246,11 @@ def eval1( # TODO(kellbuchanan): add compatibility w gcp bucket store_logits_fname = os.path.join(workdir, "logits", "val.h5py") - - if not Path(store_logits_fname).parent.exists(): - os.makedirs(str(Path(store_logits_fname).parent)) + store_logits_fname = "logits/val.h5py" + #store_logits_fname = workdir + "/logits/val.h5py" + + #if not Path(store_logits_fname).parent.exists(): + # os.makedirs(str(Path(store_logits_fname).parent)) #import pdb;pdb.set_trace() # assert not os.path.isfile(store_logits_fname) # with h5py.File(args.store_logits_fname, 'w', libver='latest', swmr=True) as f: @@ -257,8 +259,8 @@ def eval1( input_shape = dataset.meta_data['input_shape'][1:3] num_classes = dataset.meta_data['num_classes'] logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) - inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) - labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) + #inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) + #labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) #predictions_out = f.create_dataset('predictions', (num_eval_examples,) + input_shape) def evaluate(train_state: train_utils.TrainState, @@ -290,8 +292,8 @@ def to_cpu(x): # store logits, wbu batch_size start_idx = step_ * batch_size end_idx = start_idx + batch_size - inputs_out[start_idx:end_idx] = to_cpu(e_batch)['inputs'] - labels_out[start_idx:end_idx] = to_cpu(e_batch)['label'] + #inputs_out[start_idx:end_idx] = to_cpu(e_batch)['inputs'] + #labels_out[start_idx:end_idx] = to_cpu(e_batch)['label'] logits_out[start_idx:end_idx] = to_cpu(e_logits) #predictions_out[start_idx:end_idx] = to_cpu(e_predictions) diff --git a/experimental/cityscapes/run_deterministic_eval_splits_vm.sh b/experimental/cityscapes/run_deterministic_eval_splits_vm.sh old mode 100644 new mode 100755 index 0614f9698..25a565d2e --- a/experimental/cityscapes/run_deterministic_eval_splits_vm.sh +++ b/experimental/cityscapes/run_deterministic_eval_splits_vm.sh @@ -27,8 +27,16 @@ python3 deterministic_eval.py --output_dir=${output_dir_ckpt} \ --num_cores=$num_cores \ --use_gpu=$use_gpu \ --config=${config_file} \ - --config.rng_seed=${rng_seed} \ --tpu=$tpu + #--config.rng_seed=${rng_seed} \ + #--tpu=$tpu + +output_dir_logit="${output_dir_ckpt}/logits" +echo "copy logits to bucket" +gsutil cp -r "logits/val.h5py" "${output_dir_logit}/val.h5py" +echo "remove logits_file" +rm "logits/val.h5py" + done done done From ba645225a191ea47f97437da87bffa62f86771e5 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 11 Jan 2022 00:39:54 -0500 Subject: [PATCH 037/150] add uncertainty calculations during model eval --- experimental/cityscapes/README.md | 5 +- experimental/cityscapes/custom_models.py | 40 ++++++- .../cityscapes/custom_segmentation_eval.py | 111 ++++++++++++------ experimental/cityscapes/deterministic_eval.py | 2 +- .../cityscapes/uncertainty_metrics.py | 97 +++++++++++++++ 5 files changed, 212 insertions(+), 43 deletions(-) create mode 100644 experimental/cityscapes/uncertainty_metrics.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 1a13454d0..8e1e33393 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -5,7 +5,6 @@ [x] add deterministic training for segmenter. [x] include transfer learning option: init from pretrained backbone. [x] include option to train vit+ model using different train split. -[] add option to store logits from trained model. -[] used stored logits to calculate uncertainty as entropy of softmax distribution. -[] calculate uncertainty metrics +[x] add pavpu metric +[x] calculate uncertainty metrics diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 47afeabb2..63015c3fe 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -10,13 +10,34 @@ import scipy from absl import logging -from scenic.model_lib.base_models.segmentation_model import SegmentationModel +from scenic.model_lib.base_models.segmentation_model import SegmentationModel, \ + semantic_segmentation_metrics_function, _SEMANTIC_SEGMENTATION_METRICS, num_pixels from scenic.train_lib import train_utils from uncertainty_baselines.models.segmenter import SegVit +from immutabledict import immutabledict + +from scenic.model_lib.base_models import base_model +from scenic.model_lib.base_models import model_utils # JAX team is working on type annotation for pytree: # https://github.com/google/jax/issues/1555 PyTree = Union[Mapping[str, Mapping], Any] +import functools + +from uncertainty_metrics import calculate_pavpu + +# Standard default metrics for the semantic segmentation models. +_SEMANTIC_SEGMENTATION_METRICS_UNC = immutabledict({ + 'accuracy': (model_utils.weighted_correctly_classified, num_pixels), + + # The loss is already normalized, so we set num_pixels to 1.0: + 'loss': (model_utils.weighted_softmax_cross_entropy, lambda *a, **kw: 1.0), + + # The pavpu is already normalized, so we set num_pixels to 1.0: + 'pavpu': (calculate_pavpu, lambda *a, **kw: 1.0), + +}) + class SegmenterSegmentationModel(SegmentationModel): @@ -132,6 +153,23 @@ def init_backbone_from_train_state( model_state=model_state) return train_state + def get_metrics_fn_unc(self, split: Optional[str] = None) -> base_model.MetricFn: + """Returns a callable metric function for the model. + + Edited from get_metrics_fn to support additional uncertainty metrics. + + Args: + split: The split for which we calculate the metrics. It should be one of + the ['train', 'validation', 'test']. + Returns: A metric function with the following API: ```metrics_fn(logits, + batch)``` + """ + del split # For all splits, we return the same metric functions. + return functools.partial( + semantic_segmentation_metrics_function, + target_is_onehot=self.dataset_meta_data.get('target_is_onehot', False), + metrics=_SEMANTIC_SEGMENTATION_METRICS_UNC) + def _replace_dict(model: PyTree, restored: PyTree, diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py index d0a08c662..3f65be885 100644 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -6,7 +6,7 @@ """ import functools -from typing import Any, Callable, Dict, Tuple, Optional, Type +from typing import Any, Callable, Dict, Tuple, Optional, Type, Sequence import flax.linen as nn import jax @@ -85,6 +85,7 @@ def eval_step1( } (logits, _) = flax_model.apply( variables, batch['inputs'], train=False, mutable=False, debug=debug) + metrics = metrics_fn(logits, batch) confusion_matrix = get_confusion_matrix( @@ -96,9 +97,7 @@ def eval_step1( batch = jax.lax.all_gather(batch, 'batch') confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') - logits = jax.lax.all_gather(logits, 'batch') - - return batch, logits, predictions, metrics, confusion_matrix + return batch, predictions, metrics, confusion_matrix def eval1( @@ -224,12 +223,11 @@ def eval1( #learning_rate_fn = lr_schedules.get_learning_rate_fn(config) ############### EVALUATION CODE ################# - eval_step_pmapped = jax.pmap( functools.partial( eval_step1, flax_model=model.flax_model, - metrics_fn=model.get_metrics_fn('validation'), + metrics_fn=model.get_metrics_fn_unc('validation'), debug=config.debug_eval), axis_name='batch', # We can donate the eval_batch's buffer. @@ -244,25 +242,6 @@ def eval1( #num_eval_examples = dataset.meta_data['num_eval_examples'] num_eval_examples = int(steps_per_eval * config.batch_size) - # TODO(kellbuchanan): add compatibility w gcp bucket - store_logits_fname = os.path.join(workdir, "logits", "val.h5py") - store_logits_fname = "logits/val.h5py" - #store_logits_fname = workdir + "/logits/val.h5py" - - #if not Path(store_logits_fname).parent.exists(): - # os.makedirs(str(Path(store_logits_fname).parent)) - #import pdb;pdb.set_trace() - # assert not os.path.isfile(store_logits_fname) - # with h5py.File(args.store_logits_fname, 'w', libver='latest', swmr=True) as f: - f = h5py.File(store_logits_fname, 'w', libver='latest') - # f.swmr_mode = True # single write multi-read - input_shape = dataset.meta_data['input_shape'][1:3] - num_classes = dataset.meta_data['num_classes'] - logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) - #inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) - #labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) - #predictions_out = f.create_dataset('predictions', (num_eval_examples,) + input_shape) - def evaluate(train_state: train_utils.TrainState, step: int) -> Dict[str, Any]: eval_metrics = [] @@ -276,34 +255,32 @@ def to_cpu(x): for step_ in range(steps_per_eval): eval_batch = next(dataset.valid_iter) e_batch, \ - e_logits, \ e_predictions, \ e_metrics, \ confusion_matrix = eval_step_pmapped(train_state=train_state, batch=eval_batch) eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) + eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) + # Evaluate global metrics on one of the hosts (lead_host), but given # intermediate values collected from all hosts. if lead_host and global_metrics_fn is not None: # Collect data to be sent for computing global metrics. eval_all_confusion_mats.append(to_cpu(confusion_matrix)) - #import pdb;pdb.set_trace() - # store logits, wbu batch_size - start_idx = step_ * batch_size - end_idx = start_idx + batch_size - #inputs_out[start_idx:end_idx] = to_cpu(e_batch)['inputs'] - #labels_out[start_idx:end_idx] = to_cpu(e_batch)['label'] - logits_out[start_idx:end_idx] = to_cpu(e_logits) - #predictions_out[start_idx:end_idx] = to_cpu(e_predictions) - eval_global_metrics_summary = {} if lead_host and global_metrics_fn is not None: eval_global_metrics_summary = global_metrics_fn(eval_all_confusion_mats, dataset.meta_data) - ############### LOG EVAL SUMMARY ############### + #eval_summary = train_utils.log_eval_summary( + eval_summary = log_eval_summary( + step=step, + eval_metrics=eval_metrics, + extra_eval_summary=eval_global_metrics_summary, + # writer=writer + ) """ eval_summary = train_utils.log_eval_summary( step=step, @@ -321,7 +298,7 @@ def to_cpu(x): writer.flush() """ - eval_summary = 0 + #eval_summary = 0 del eval_metrics return eval_summary @@ -423,8 +400,66 @@ def to_cpu(x): """ chrono.resume() # Un-pause now. - f.close() # Wait until computations are done before exiting. jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() # Return the train and eval summary after last step for regresesion testing. return train_state, train_summary, eval_summary + + +def log_eval_summary(step: int, + eval_metrics: Sequence[Dict[str, Tuple[float, int]]], + extra_eval_summary: Optional[Dict[str, Any]] = None, + summary_writer: Optional[Any] = None, + metrics_normalizer_fn: Optional[ + Callable[[Dict[str, Tuple[float, int]], str], + Dict[str, float]]] = None, + prefix: str = 'valid', + key_separator: str = '_') -> Dict[str, float]: + """Computes and logs eval metrics. + + Args: + step: Current step. + eval_metrics: Sequence of dictionaries of calculated metrics. + extra_eval_summary: A dict containing summaries that are already ready to be + logged, e.g. global metrics from eval set, like precision/recall. + summary_writer: Summary writer object. + metrics_normalizer_fn: Used for normalizing metrics. The api for + this function is: `new_metrics_dict = metrics_normalizer_fn( metrics_dict, + split)`. If set to None, we use the normalize_metrics_summary which uses + the normalizer paired with each metric to normalize it. + prefix: str; Prefix added to the name of the summaries writen by this + function. + key_separator: Separator added between the prefix and key. + + Returns: + eval summary: A dictionary of metrics. + """ + eval_metrics = train_utils.stack_forest(eval_metrics) + + # Compute the sum over all examples in all batches. + eval_metrics_summary = jax.tree_map(lambda x: x.sum(), eval_metrics) + # Normalize metrics by the total number of exampels. + metrics_normalizer_fn = ( + metrics_normalizer_fn or train_utils.normalize_metrics_summary) + eval_metrics_summary = metrics_normalizer_fn(eval_metrics_summary, 'eval') + # If None, set to an empty dictionary. + extra_eval_summary = extra_eval_summary or {} + + if jax.process_index() == 0: + message = '' + for key, val in eval_metrics_summary.items(): + message += f'{key}: {val} | ' + for key, val in extra_eval_summary.items(): + message += f'{key}: {val} | ' + logging.info('step: %d -- %s -- {%s}', step, prefix, message) + + if summary_writer is not None: + for key, val in eval_metrics_summary.items(): + summary_writer.scalar(f'{prefix}{key_separator}{key}', val, step) + for key, val in extra_eval_summary.items(): + summary_writer.scalar(f'{prefix}{key_separator}{key}', val, step) + summary_writer.flush() + + # Add extra_eval_summary to the returned eval_summary. + eval_metrics_summary.update(extra_eval_summary) + return eval_metrics_summary diff --git a/experimental/cityscapes/deterministic_eval.py b/experimental/cityscapes/deterministic_eval.py index 87c8a2dbd..2a5d5bc46 100644 --- a/experimental/cityscapes/deterministic_eval.py +++ b/experimental/cityscapes/deterministic_eval.py @@ -141,7 +141,7 @@ def main(config, output_dir): config=config, workdir=output_dir, writer=summary_writer) - print(train_summary) + print(eval_summary) return diff --git a/experimental/cityscapes/uncertainty_metrics.py b/experimental/cityscapes/uncertainty_metrics.py new file mode 100644 index 000000000..c679221d9 --- /dev/null +++ b/experimental/cityscapes/uncertainty_metrics.py @@ -0,0 +1,97 @@ +""" +Include uncertainty metrics +""" +import jax.numpy as jnp +from typing import Optional, Any, Tuple, Union +from scenic.model_lib.base_models.model_utils import apply_weights + +from jax import lax + + +def calculate_pavpu( + labels: jnp.ndarray, + logits: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.5, + window_size: Optional[int] = 2) -> jnp.ndarray: + """ + Calculate PavPu + """ + if labels.ndim == logits.ndim: # One-hot targets. + targets = jnp.argmax(labels, axis=-1) + else: + targets = labels + + preds = jnp.argmax(logits, axis=-1) + + # calculate binary accuracy map + correct = jnp.equal(preds, targets) + + if weights is not None: + correct = apply_weights(correct, weights) + + correct = correct.astype(jnp.float32) + + binary_acc_map = binarize_map(correct,window_size,accuracy_th) + + # calculate uncertainty map + entropy = jnp.sum(logits*jnp.log(logits), axis=-1).astype(jnp.float32) + + binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) + + # umber of patches that are accurate and certain + n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) + + # number of patches that are inaccurate and certain + n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 1)) + ) + # number of patches that are inaccurate and uncertain + n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 0)) + ) + + # number of patches that are accurate and uncertain + n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), + jnp.equal(binary_unc_map, 0)) + ) + + # p_accurate_certain = n_ac / (n_ac + n_ic) + # p_uncertain_inaccurate = n_iu / (n_ic + n_iu) + + # Patch accuracy vs Patch uncertainty + pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) + + return pavpu + + +def binarize_map( + array_map: jnp.ndarray, + window_size: Optional[int] = 2, + threshold:Optional[float] = 0.5, + ) -> jnp.ndarray: + """ + Given a map, apply a 2d spatial strided convolution to avg adjacent values + """ + # expand dims if necessary + if array_map.ndim == 3: + array_map = jnp.expand_dims(array_map, 0) + + # create a kernel + kernel = jnp.ones(array_map.shape[:-2] + (window_size, window_size)) + + # Convolve map with kernel + out = lax.conv(array_map, # lhs = NCHW image tensor + kernel, # rhs = OIHW conv kernel tensor + (window_size, window_size), # window strides + 'SAME') # padding mode + + # divide by window_size + out = jnp.divide(out, window_size*window_size) + + # binarize_map according to threshold + binary_map = jnp.greater_equal(out, threshold) + + return binary_map.astype(jnp.int32) + From 1895cd3e4d6fb0a06d602efb17c680c0eb3871eb Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 11 Jan 2022 07:44:26 +0000 Subject: [PATCH 038/150] add code to store metrics for all runs --- .../cityscapes/custom_segmentation_eval.py | 2 +- .../cityscapes/deterministic_eval_gcp.py | 174 ++++++++++++++++++ .../cityscapes/run_deterministic_eval.sh | 23 ++- 3 files changed, 193 insertions(+), 6 deletions(-) create mode 100644 experimental/cityscapes/deterministic_eval_gcp.py diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py index 3f65be885..63e1f2e99 100644 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -335,7 +335,7 @@ def to_cpu(x): step0_log['gflops'] = gflops writer.write_scalars(1, step0_log) - for step in range(start_step + 1, total_steps + 2): + for step in range(start_step + 1, start_step + 2): with jax.profiler.StepTraceContext('train', sfLtep_num=step): train_batch = next(dataset.train_iter) diff --git a/experimental/cityscapes/deterministic_eval_gcp.py b/experimental/cityscapes/deterministic_eval_gcp.py new file mode 100644 index 000000000..a968ca1c1 --- /dev/null +++ b/experimental/cityscapes/deterministic_eval_gcp.py @@ -0,0 +1,174 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Eval vit model on cityscapes. + +Step 1: aim to train model on cityscapes for 1 step +# Runs with + +""" + +import os +import sys + +# %% +import jax +# %% +import tensorflow as tf +# %% +from absl import app +from absl import flags +from absl import logging +from ml_collections.config_flags import config_flags +from tensorflow.io import gfile + +import custom_models +import custom_segmentation_eval +# scenic dependencies for debugging +from scenic.train_lib import train_utils + +# import train_utils # local file import +import pandas as pd + +#%% +config_flags.DEFINE_config_file( + 'config', None, 'Training configuration.', lock_config=True) +flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') +flags.DEFINE_integer( + 'num_cores', default=None, help='Unused. How many devices being used.') +flags.DEFINE_boolean( + 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') +flags.DEFINE_string('tpu', None, + 'Unused. Name of the TPU. Only used if use_gpu is False.') +flags.DEFINE_string('dataset_service_address', None, + 'Address of the tf.data service') +FLAGS = flags.FLAGS + + +def write_note(note): + if jax.process_index() == 0: + logging.info('NOTE: %s', note) + + +from clu import metric_writers + + +def run(config, workdir): + """Prepares model, and dataset for training. + + This creates summary directories, summary writers, model definition, and + builds datasets to be sent to the main training script. + + Args: + config: ConfigDict; Hyper parameters. + workdir: string; Root directory for the experiment. + + Returns: + The outputs of trainer.train(), which are train_state, train_summary, and + eval_summary. + """ + lead_host = jax.process_index() == 0 + # set up the train_dir and log_dir + gfile.makedirs(workdir) + #workdir = os.path.join(workdir, 'trial') + #gfile.makedirs(workdir) + + summary_writer = None + if lead_host and config.write_summary: + tensorboard_dir = os.path.join(workdir, 'tb_summaries') + gfile.makedirs(tensorboard_dir) + # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) + summary_writer = metric_writers.SummaryWriter(tensorboard_dir) + + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + rng = jax.random.PRNGKey(config.rng_seed) + logging.info('rng: %s', rng) + + model_cls = custom_models.SegmenterSegmentationModel + + # ---------------------- + # Load dataset + # ---------------------- + data_rng, rng = jax.random.split(rng) + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + write_note('Loading dataset...') + + # TODO: update num_classes + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + return rng, model_cls, dataset, config, workdir, summary_writer + + +def main(config, output_dir): + + print('config') + print(config) + seed = config.get('rng_seed', 0) + rng = jax.random.PRNGKey(seed) + tf.random.set_seed(seed) + + print('workdir ', output_dir) + rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) + print('workdir ', workdir) + + # ---------------------- + # Eval function + # ---------------------- + eval_fn = custom_segmentation_eval.eval1 + + # models + for rng_seed in [0,1,2,3,4]: + for train_mode in ["deterministic","scratch","gp"]: + for train_split in [100,75, 50, 25, 10]: + run_name="{}_split{}_seed{}".format(train_mode, train_split, rng_seed) + tmp_workdir="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/{}".format(run_name) + print("temp directory", tmp_workdir) + tmp_resultsdir="results/metrics/{}.csv".format(run_name) + #import pdb; pdb.set_trace(); + train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, + config=config, + workdir=tmp_workdir, writer=summary_writer) + print(eval_summary) + #import pdb;pdb.set_trace() + df = pd.DataFrame([eval_summary]) + df.to_csv (r'{}'.format(tmp_resultsdir), index = False, header=True) + + return + + +if __name__ == '__main__': + # Adds jax flags to the program. + jax.config.config_with_absl() + + # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` + # function that returns values for tests and does not directly access flags, + # and then have `main` return None. + + def _main(unused_argv): + config = FLAGS.config + output_dir = FLAGS.output_dir + main(config, output_dir) + + app.run(_main) # Ignore the returned values from `main`. diff --git a/experimental/cityscapes/run_deterministic_eval.sh b/experimental/cityscapes/run_deterministic_eval.sh index 3e7512313..c973ebd93 100755 --- a/experimental/cityscapes/run_deterministic_eval.sh +++ b/experimental/cityscapes/run_deterministic_eval.sh @@ -1,8 +1,14 @@ -#!/bin/sh +#!/bin/bash # train cityscapes using segmenter with pretrained backbone # supports 2 options to +function get_config() +{ + local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" + echo "$config_file_name" +} + if [ "$(uname)" = "Darwin" ] ; then # Do something under Mac OS X platform config_file='experiments/imagenet21k_segmenter_cityscapes1.py' @@ -17,17 +23,24 @@ if [ "$(uname)" = "Darwin" ] ; then # --tpu=$tpu elif [ "$(uname)" = "Linux" ]; then echo "in Linux" - config_file='experiments/imagenet21k_segmenter_cityscapes13.py' - output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + train_mode="scratch" + train_split=100 + rng_seed=0 + config_file=$(get_config $train_mode $train_split) + run_name="${train_mode}_split${train_split}_seed${rng_seed}" + #config_file='experiments/imagenet21k_segmenter_cityscapes13.py' + #output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" num_cores=8 tpu='local' use_gpu=False - python3 deterministic.py --output_dir=${output_dir} \ + python3 deterministic_eval_gcp.py --output_dir=${output_dir} \ --num_cores=$num_cores \ --use_gpu=$use_gpu \ --config=${config_file} \ - --config.batch_size=${batch_size} \ --tpu=$tpu +# --config.batch_size=${batch_size} \ + fi #%% From 6e0eb0e6e5be10377de5505c20a7ef9279774b97 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 11 Jan 2022 13:26:35 -0500 Subject: [PATCH 039/150] fix bug in uncertainty calculate --- experimental/cityscapes/custom_models.py | 8 +- .../cityscapes/uncertainty_metrics.py | 135 +++++++++++++++++- 2 files changed, 138 insertions(+), 5 deletions(-) diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 63015c3fe..ac4c07df5 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -24,7 +24,7 @@ PyTree = Union[Mapping[str, Mapping], Any] import functools -from uncertainty_metrics import calculate_pavpu +from uncertainty_metrics import calculate_pavpu, calculate_puncert_inacc, calculate_pacc_cert # Standard default metrics for the semantic segmentation models. _SEMANTIC_SEGMENTATION_METRICS_UNC = immutabledict({ @@ -36,6 +36,12 @@ # The pavpu is already normalized, so we set num_pixels to 1.0: 'pavpu': (calculate_pavpu, lambda *a, **kw: 1.0), + # The pavpu is already normalized, so we set num_pixels to 1.0: + 'puncert_inacc': (calculate_puncert_inacc, lambda *a, **kw: 1.0), + + # The pavpu is already normalized, so we set num_pixels to 1.0: + 'pacc_cert': (calculate_pacc_cert, lambda *a, **kw: 1.0), + }) diff --git a/experimental/cityscapes/uncertainty_metrics.py b/experimental/cityscapes/uncertainty_metrics.py index c679221d9..2dbddbb40 100644 --- a/experimental/cityscapes/uncertainty_metrics.py +++ b/experimental/cityscapes/uncertainty_metrics.py @@ -7,6 +7,130 @@ from jax import lax +# TODO(kellybuchanan): consolidate metric calculation + + +def calculate_puncert_inacc( + labels: jnp.ndarray, + logits: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.5, + window_size: Optional[int] = 2) -> jnp.ndarray: + """ + Calculate PavPu + TODO(kellybuchana): include weights for entropy calculation + """ + if labels.ndim == logits.ndim: # One-hot targets. + targets = jnp.argmax(labels, axis=-1) + else: + targets = labels + + preds = jnp.argmax(logits, axis=-1) + + # calculate binary accuracy map + correct = jnp.equal(preds, targets) + + # batch masking + if weights is not None: + correct = apply_weights(correct, weights) + + correct = correct.astype(jnp.float32) + + binary_acc_map = binarize_map(correct, window_size, accuracy_th) + + # Calculate uncertainty map + probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) + entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) + + binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) + + # umber of patches that are accurate and certain + n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) + + # number of patches that are inaccurate and certain + n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 1)) + ) + # number of patches that are inaccurate and uncertain + n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 0)) + ) + + # number of patches that are accurate and uncertain + n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), + jnp.equal(binary_unc_map, 0)) + ) + + p_accurate_certain = n_ac / (n_ac + n_ic) + p_uncertain_inaccurate = n_iu / (n_ic + n_iu) + + # Patch accuracy vs Patch uncertainty + pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) + + return p_uncertain_inaccurate + + +def calculate_pacc_cert( + labels: jnp.ndarray, + logits: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.5, + window_size: Optional[int] = 2) -> jnp.ndarray: + """ + Calculate PavPu + TODO(kellybuchana): include weights for entropy calculation + """ + if labels.ndim == logits.ndim: # One-hot targets. + targets = jnp.argmax(labels, axis=-1) + else: + targets = labels + + preds = jnp.argmax(logits, axis=-1) + + # calculate binary accuracy map + correct = jnp.equal(preds, targets) + + # batch masking + if weights is not None: + correct = apply_weights(correct, weights) + + correct = correct.astype(jnp.float32) + + binary_acc_map = binarize_map(correct, window_size, accuracy_th) + + # Calculate uncertainty map + probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) + entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) + + binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) + + # umber of patches that are accurate and certain + n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) + + # number of patches that are inaccurate and certain + n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 1)) + ) + # number of patches that are inaccurate and uncertain + n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), + jnp.equal(binary_unc_map, 0)) + ) + + # number of patches that are accurate and uncertain + n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), + jnp.equal(binary_unc_map, 0)) + ) + + p_accurate_certain = n_ac / (n_ac + n_ic) + # p_uncertain_inaccurate = n_iu / (n_ic + n_iu) + + # Patch accuracy vs Patch uncertainty + pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) + + return p_accurate_certain + def calculate_pavpu( labels: jnp.ndarray, @@ -17,6 +141,7 @@ def calculate_pavpu( window_size: Optional[int] = 2) -> jnp.ndarray: """ Calculate PavPu + TODO(kellybuchana): include weights for entropy calculation """ if labels.ndim == logits.ndim: # One-hot targets. targets = jnp.argmax(labels, axis=-1) @@ -28,15 +153,17 @@ def calculate_pavpu( # calculate binary accuracy map correct = jnp.equal(preds, targets) + # batch masking if weights is not None: correct = apply_weights(correct, weights) correct = correct.astype(jnp.float32) - binary_acc_map = binarize_map(correct,window_size,accuracy_th) + binary_acc_map = binarize_map(correct, window_size, accuracy_th) - # calculate uncertainty map - entropy = jnp.sum(logits*jnp.log(logits), axis=-1).astype(jnp.float32) + # Calculate uncertainty map + probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) + entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) @@ -68,7 +195,7 @@ def calculate_pavpu( def binarize_map( array_map: jnp.ndarray, - window_size: Optional[int] = 2, + window_size: Optional[int] = 4, threshold:Optional[float] = 0.5, ) -> jnp.ndarray: """ From e328239db2aacabae1ded94b4c4c0560a4a8044e Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 17 Jan 2022 20:43:52 -0500 Subject: [PATCH 040/150] update custom models --- experimental/cityscapes/custom_models.py | 3 +- .../cityscapes/uncertainty_metrics.py | 226 ++++++++---------- 2 files changed, 100 insertions(+), 129 deletions(-) diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index ac4c07df5..6e6fabe95 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -24,7 +24,8 @@ PyTree = Union[Mapping[str, Mapping], Any] import functools -from uncertainty_metrics import calculate_pavpu, calculate_puncert_inacc, calculate_pacc_cert +from uncertainty_metrics import calculate_pavpu, calculate_puncert_inacc, \ + calculate_pacc_cert # Standard default metrics for the semantic segmentation models. _SEMANTIC_SEGMENTATION_METRICS_UNC = immutabledict({ diff --git a/experimental/cityscapes/uncertainty_metrics.py b/experimental/cityscapes/uncertainty_metrics.py index 2dbddbb40..a5f7b84d3 100644 --- a/experimental/cityscapes/uncertainty_metrics.py +++ b/experimental/cityscapes/uncertainty_metrics.py @@ -3,85 +3,56 @@ """ import jax.numpy as jnp from typing import Optional, Any, Tuple, Union + +import numpy as np + from scenic.model_lib.base_models.model_utils import apply_weights from jax import lax -# TODO(kellybuchanan): consolidate metric calculation +# TODO(kellybuchanan): consolidate metric calculation as class -def calculate_puncert_inacc( - labels: jnp.ndarray, - logits: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.5, - window_size: Optional[int] = 2) -> jnp.ndarray: +def calculate_num_patches_binary_maps( + binary_acc_map: jnp.ndarray, + binary_unc_map: jnp.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ - Calculate PavPu - TODO(kellybuchana): include weights for entropy calculation + Calculate conditional probabilities in confusion matrix given binary + accuracy and uncertainty maps """ - if labels.ndim == logits.ndim: # One-hot targets. - targets = jnp.argmax(labels, axis=-1) - else: - targets = labels - - preds = jnp.argmax(logits, axis=-1) - - # calculate binary accuracy map - correct = jnp.equal(preds, targets) - - # batch masking - if weights is not None: - correct = apply_weights(correct, weights) - - correct = correct.astype(jnp.float32) - - binary_acc_map = binarize_map(correct, window_size, accuracy_th) - - # Calculate uncertainty map - probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) - entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) - - binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) - - # umber of patches that are accurate and certain - n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) + # number of patches that are accurate and certain + n_ac = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), + jnp.equal(binary_unc_map, 0)), axis=(-1, -2)) # number of patches that are inaccurate and certain n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 1)) + jnp.equal(binary_unc_map, 0)), axis=(-1, -2) ) # number of patches that are inaccurate and uncertain n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 0)) + jnp.equal(binary_unc_map, 1)), axis=(-1, -2) ) # number of patches that are accurate and uncertain n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 0)) + jnp.equal(binary_unc_map, 1)), axis=(-1, -2) ) - p_accurate_certain = n_ac / (n_ac + n_ic) - p_uncertain_inaccurate = n_iu / (n_ic + n_iu) - - # Patch accuracy vs Patch uncertainty - pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) - - return p_uncertain_inaccurate + return n_ac, n_ic, n_iu, n_au -def calculate_pacc_cert( - labels: jnp.ndarray, - logits: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.5, - window_size: Optional[int] = 2) -> jnp.ndarray: +def calculate_uncertainty_confusion_matrix( + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.5, + window_size: Optional[int] = 2) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ - Calculate PavPu - TODO(kellybuchana): include weights for entropy calculation + Calculate conditional probabilities + TODO(kellybuchanan): include weights for entropy calculation """ + # --- if labels.ndim == logits.ndim: # One-hot targets. targets = jnp.argmax(labels, axis=-1) else: @@ -98,94 +69,88 @@ def calculate_pacc_cert( correct = correct.astype(jnp.float32) - binary_acc_map = binarize_map(correct, window_size, accuracy_th) + # A given patch is accurate if its acc > accuracy_threshold + binary_acc_map = reduce_2dmap(correct, window_size, accuracy_th).astype(jnp.float32) # Calculate uncertainty map - probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) + probs = jnp.exp(logits) / jnp.sum(jnp.exp(logits), -1, keepdims=True) entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) - binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) - - # umber of patches that are accurate and certain - n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) - - # number of patches that are inaccurate and certain - n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 1)) - ) - # number of patches that are inaccurate and uncertain - n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 0)) - ) - - # number of patches that are accurate and uncertain - n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 0)) - ) - - p_accurate_certain = n_ac / (n_ac + n_ic) - # p_uncertain_inaccurate = n_iu / (n_ic + n_iu) + # A given patch is uncertain if its uncertainty > uncertainty_th + binary_unc_map = reduce_2dmap(entropy, window_size, uncertainty_th).astype(jnp.float32) - # Patch accuracy vs Patch uncertainty - pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) + # number of patches that are accurate and certain + n_ac, n_ic, n_iu, n_au = calculate_num_patches_binary_maps( + binary_acc_map, binary_unc_map) - return p_accurate_certain + return n_ac, n_ic, n_iu, n_au -def calculate_pavpu( - labels: jnp.ndarray, +def calculate_puncert_inacc( logits: jnp.ndarray, + labels: jnp.ndarray, weights: Optional[jnp.ndarray] = None, accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, window_size: Optional[int] = 2) -> jnp.ndarray: """ - Calculate PavPu - TODO(kellybuchana): include weights for entropy calculation + Calculate p(uncertain | inaccurate) """ - if labels.ndim == logits.ndim: # One-hot targets. - targets = jnp.argmax(labels, axis=-1) - else: - targets = labels - preds = jnp.argmax(logits, axis=-1) + n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( + logits=logits, + labels=labels, + weights=weights, + accuracy_th=accuracy_th, + uncertainty_th=uncertainty_th, + window_size=window_size) - # calculate binary accuracy map - correct = jnp.equal(preds, targets) - - # batch masking - if weights is not None: - correct = apply_weights(correct, weights) - - correct = correct.astype(jnp.float32) + p_uncertain_inaccurate = n_iu / (n_ic + n_iu) - binary_acc_map = binarize_map(correct, window_size, accuracy_th) + return p_uncertain_inaccurate - # Calculate uncertainty map - probs = jnp.exp(logits)/ jnp.sum(jnp.exp(logits),-1, keepdims=True) - entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) - binary_unc_map = binarize_map(entropy, window_size, uncertainty_th) +def calculate_pacc_cert( + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, + window_size: Optional[int] = 2) -> jnp.ndarray: + """ + Calculate p(accurate|certain) + """ + # TODO(kellybuchanan): reconcile cases where there are no certain patches. - # umber of patches that are accurate and certain - n_ac = jnp.sum(jnp.logical_and(binary_acc_map, binary_unc_map)) + n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( + logits=logits, + labels=labels, + weights=weights, + accuracy_th=accuracy_th, + uncertainty_th=uncertainty_th, + window_size=window_size) - # number of patches that are inaccurate and certain - n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 1)) - ) - # number of patches that are inaccurate and uncertain - n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 0)) - ) + p_accurate_certain = n_ac / (n_ac + n_ic) + return p_accurate_certain - # number of patches that are accurate and uncertain - n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 0)) - ) - # p_accurate_certain = n_ac / (n_ac + n_ic) - # p_uncertain_inaccurate = n_iu / (n_ic + n_iu) +def calculate_pavpu( + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, + window_size: Optional[int] = 2) -> jnp.ndarray: + """ + Calculate PavPu + """ + n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( + logits=logits, + labels=labels, + weights=weights, + accuracy_th=accuracy_th, + uncertainty_th=uncertainty_th, + window_size=window_size) # Patch accuracy vs Patch uncertainty pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) @@ -193,24 +158,27 @@ def calculate_pavpu( return pavpu -def binarize_map( +def reduce_2dmap( array_map: jnp.ndarray, - window_size: Optional[int] = 4, - threshold:Optional[float] = 0.5, + window_size: int = 4, + threshold: float = 0.5, ) -> jnp.ndarray: """ Given a map, apply a 2d spatial strided convolution to avg adjacent values """ - # expand dims if necessary + reduce_dims = 0 + + # Expand dims if necessary if array_map.ndim == 3: array_map = jnp.expand_dims(array_map, 0) + reduce_dims = 1 - # create a kernel + # Create a kernel kernel = jnp.ones(array_map.shape[:-2] + (window_size, window_size)) # Convolve map with kernel out = lax.conv(array_map, # lhs = NCHW image tensor - kernel, # rhs = OIHW conv kernel tensor + kernel, # rhs = OIHW conv kernel tensor (window_size, window_size), # window strides 'SAME') # padding mode @@ -218,7 +186,9 @@ def binarize_map( out = jnp.divide(out, window_size*window_size) # binarize_map according to threshold - binary_map = jnp.greater_equal(out, threshold) + binary_map = jnp.greater(out, threshold) - return binary_map.astype(jnp.int32) + if reduce_dims: + binary_map = jnp.squeeze(binary_map, 0) + return binary_map.astype(jnp.int32) From b02d0f18c3b0f1d982b76b5c8cc45d6fcb7a6467 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 18 Jan 2022 02:16:33 -0500 Subject: [PATCH 041/150] ignore cases where mask is 0, still need to find a nice workaround --- .../cityscapes/custom_segmentation_eval.py | 1 - .../cityscapes/uncertainty_metrics.py | 61 +++++++++++-------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py index 63e1f2e99..c7eb63b6c 100644 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ b/experimental/cityscapes/custom_segmentation_eval.py @@ -259,7 +259,6 @@ def to_cpu(x): e_metrics, \ confusion_matrix = eval_step_pmapped(train_state=train_state, batch=eval_batch) - eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) # Evaluate global metrics on one of the hosts (lead_host), but given diff --git a/experimental/cityscapes/uncertainty_metrics.py b/experimental/cityscapes/uncertainty_metrics.py index a5f7b84d3..47469afa5 100644 --- a/experimental/cityscapes/uncertainty_metrics.py +++ b/experimental/cityscapes/uncertainty_metrics.py @@ -11,6 +11,7 @@ from jax import lax # TODO(kellybuchanan): consolidate metric calculation as class +# TODO(kellybuchanan): support cases where mask is 0 def calculate_num_patches_binary_maps( @@ -22,7 +23,8 @@ def calculate_num_patches_binary_maps( """ # number of patches that are accurate and certain n_ac = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 0)), axis=(-1, -2)) + jnp.equal(binary_unc_map, 0)), axis=(-1, -2) + ) # number of patches that are inaccurate and certain n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), @@ -87,12 +89,12 @@ def calculate_uncertainty_confusion_matrix( def calculate_puncert_inacc( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, + window_size: Optional[int] = 2) -> jnp.ndarray: """ Calculate p(uncertain | inaccurate) """ @@ -105,18 +107,22 @@ def calculate_puncert_inacc( uncertainty_th=uncertainty_th, window_size=window_size) - p_uncertain_inaccurate = n_iu / (n_ic + n_iu) + # p(uncertain | innacurate) + p_tmp = n_iu / (n_ic + n_iu) - return p_uncertain_inaccurate + # TODO: ignore cases where mask is 0 + p_tmp = jnp.nan_to_num(p_tmp) + + return p_tmp def calculate_pacc_cert( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, + window_size: Optional[int] = 2) -> jnp.ndarray: """ Calculate p(accurate|certain) """ @@ -130,17 +136,20 @@ def calculate_pacc_cert( uncertainty_th=uncertainty_th, window_size=window_size) - p_accurate_certain = n_ac / (n_ac + n_ic) - return p_accurate_certain + p_tmp = n_ac / (n_ac + n_ic) + + # TODO: ignore cases where mask is 0 + p_tmp = jnp.nan_to_num(p_tmp) + return p_tmp def calculate_pavpu( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: + logits: jnp.ndarray, + labels: jnp.ndarray, + weights: Optional[jnp.ndarray] = None, + accuracy_th: Optional[float] = 0.5, + uncertainty_th: Optional[float] = 0.4, + window_size: Optional[int] = 2) -> jnp.ndarray: """ Calculate PavPu """ @@ -153,9 +162,11 @@ def calculate_pavpu( window_size=window_size) # Patch accuracy vs Patch uncertainty - pavpu = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) + p_tmp = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) - return pavpu + # TODO: ignore cases where mask is 0 + p_tmp = jnp.nan_to_num(p_tmp) + return p_tmp def reduce_2dmap( From a49b80ed609f72fd342aa4efccac4b4f33eed077 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 31 Jan 2022 16:27:01 -0500 Subject: [PATCH 042/150] add comment to pretrainer utils download which has config inputs not as variables but hardcoded --- experimental/cityscapes/README.md | 15 +++++++++------ experimental/cityscapes/pretrainer_utils.py | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 8e1e33393..eb2e1d4ea 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -1,10 +1,13 @@ # Log -[x] add cityscapes dataset loader. -[x] add deterministic training for segmenter. -[x] include transfer learning option: init from pretrained backbone. -[x] include option to train vit+ model using different train split. -[x] add pavpu metric -[x] calculate uncertainty metrics +[x] add cityscapes dataset loader.
+[x] add deterministic training for segmenter.
+[x] include transfer learning option: init from pretrained backbone.
+[x] include option to train vit+ model using different train split.
+[x] add pavpu metric.
+[x] calculate uncertainty metrics.
+ +[] Add run with vit l-32 backbone.
+ diff --git a/experimental/cityscapes/pretrainer_utils.py b/experimental/cityscapes/pretrainer_utils.py index 35899ce6a..9b2958002 100644 --- a/experimental/cityscapes/pretrainer_utils.py +++ b/experimental/cityscapes/pretrainer_utils.py @@ -31,6 +31,8 @@ def load_bb_config(config): Returns: restored_model_cfg: mock model config """ + #TODO(kellybuchanan): calculate grid given config + restored_model_cfg = ml_collections.ConfigDict() restored_model_cfg.patches = ml_collections.ConfigDict() restored_model_cfg.patches.size = [16, 16] @@ -38,7 +40,6 @@ def load_bb_config(config): # if config.pretrained_backbone_configs.type == 'base': # restored_model_cfg.model.transformer.dropout_rate = 0.1 - #TODO(kellybuchanan): calculate grid given config restored_model_cfg.patches.grid = [224//16, 224//16] return restored_model_cfg From 74dd7ff4db38ae367839037163a4121243f75f05 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Mon, 31 Jan 2022 21:46:04 +0000 Subject: [PATCH 043/150] update debug config to include base model for vit-l32 --- .../imagenet21k_segmenter_cityscapes1.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py index 27f6b5a17..39c512898 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -77,6 +77,20 @@ num_heads = 12 num_layers = 12 hidden_size = 768 +elif DEBUG == 5: + target_size =(128, 128) + STRIDE=32 + batch_size=8 + number_train_examples_debug = 16 + number_eval_examples_debug = 16 + num_training_epochs = 1 # ml_collections.FieldReference(100) + log_eval_steps = 1 + + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + def get_config(): From 868c8c6f23ceff6833db7e15d9007d784bc6194a Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Mon, 31 Jan 2022 22:02:53 +0000 Subject: [PATCH 044/150] add code to debug a toy model on vm --- experimental/cityscapes/run_pretrained.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh index 6d9fc616a..51ecfeb52 100755 --- a/experimental/cityscapes/run_pretrained.sh +++ b/experimental/cityscapes/run_pretrained.sh @@ -18,7 +18,7 @@ if [ "$(uname)" = "Darwin" ] ; then # --tpu=$tpu elif [ "$(uname)" = "Linux" ]; then echo "in Linux" - config_file='experiments/imagenet21k_segmenter_cityscapes13.py' + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" num_cores=8 tpu='local' From f287731ff0c379220edfe3234cb83a3ab1f44d2e Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 1 Feb 2022 16:11:25 -0500 Subject: [PATCH 045/150] include training config for vit-l32 model, directly read positional embedding size from loaded model to automatically rescale upstream positional embeddings to match the dimensions of the downtream possitional embeddings --- experimental/cityscapes/custom_models.py | 21 +++++++++++-------- .../imagenet21k_segmenter_cityscapes1.py | 4 ++-- ...k_segmenter_cityscapes_deterministic_10.py | 11 ++++++---- experimental/cityscapes/pretrainer_utils.py | 9 ++++---- experimental/cityscapes/run_pretrained.sh | 3 +++ 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py index 6e6fabe95..70dcc01a0 100644 --- a/experimental/cityscapes/custom_models.py +++ b/experimental/cityscapes/custom_models.py @@ -94,16 +94,16 @@ def init_backbone_from_train_state( self.config.dataset_configs.target_size[1] // self.config.patches.size[1] ] - - # Get grid sizes of restored model: - if 'patches' in restored_model_cfg: - restored_patches_cfg = restored_model_cfg.patches - else: - restored_patches_cfg = restored_model_cfg.stem_configs.patches - if 'grid' in restored_patches_cfg: + # Find size of positional embeddings (grid size) if given as input + # otherwise we will take the will use the model checkpoint to estimate thiis + if ('patches' in restored_model_cfg) or ('stem_configs' in restored_model_cfg): + if ('patches' in restored_model_cfg): + restored_patches_cfg = restored_model_cfg.patches + else: + restored_patches_cfg = restored_model_cfg.stem_configs.patches gs_vit = restored_patches_cfg.grid else: - raise NotImplementedError("") + gs_vit = None # init_dset_meta = self.config.model.backbone.init_from.dataset_meta_data # gs_vit = [ @@ -228,7 +228,7 @@ def _replace_dict(model: PyTree, continue logging.info('Loading %s from checkpoint into model', m_key_str) - # fix if token + # resize positional embeddings given token if 'posembed_input' in m_key: # might need resolution change # the backbone should be pose segmenter # vit_posemb = m_params['posembed_input']['pos_embedding'] @@ -246,6 +246,9 @@ def _replace_dict(model: PyTree, vit_posemb = vit_posemb[0] logging.info('Resized variant: %s to %s', vit_posemb.shape, segvit_posemb.shape) + if gs_vit is None: + gs_vit = [int(np.sqrt(vit_posemb.shape[0])), int(np.sqrt(vit_posemb.shape[0]))] + assert np.prod(gs_vit) == vit_posemb.shape[0] assert np.prod(gs_segvit) == segvit_ntok if gs_vit != gs_segvit: # we need resolution change diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py index 39c512898..188d206c3 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -23,7 +23,7 @@ #import get_fewshot # local file import _CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 1 +DEBUG = 5 STRIDE = 4 target_size=(128, 128) @@ -80,7 +80,7 @@ elif DEBUG == 5: target_size =(128, 128) STRIDE=32 - batch_size=8 + batch_size=1 number_train_examples_debug = 16 number_eval_examples_debug = 16 num_training_epochs = 1 # ml_collections.FieldReference(100) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py index 37c473bed..8dfe8fb56 100644 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -72,8 +72,9 @@ def get_config(): config.dataset_configs.train_split = train_split # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + if DEBUG == 5: + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic # model @@ -114,8 +115,10 @@ def get_config(): config.focal_loss_gamma = 0.0 # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size + if DEBUG == 5: + steps_per_epoch = number_train_examples_debug // config.batch_size + else: + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. config.lr_configs = ml_collections.ConfigDict() diff --git a/experimental/cityscapes/pretrainer_utils.py b/experimental/cityscapes/pretrainer_utils.py index 9b2958002..0caed0d32 100644 --- a/experimental/cityscapes/pretrainer_utils.py +++ b/experimental/cityscapes/pretrainer_utils.py @@ -31,15 +31,16 @@ def load_bb_config(config): Returns: restored_model_cfg: mock model config """ - #TODO(kellybuchanan): calculate grid given config + #TODO(kellybuchanan): we can read the grid size (pose-embedding) given config file. + # Current approach estimates it from the aata directly, see init_backbone_from_train_state restored_model_cfg = ml_collections.ConfigDict() - restored_model_cfg.patches = ml_collections.ConfigDict() - restored_model_cfg.patches.size = [16, 16] + #restored_model_cfg.patches = ml_collections.ConfigDict() + #restored_model_cfg.patches.size = [16, 16] restored_model_cfg.classifier = 'token' # if config.pretrained_backbone_configs.type == 'base': # restored_model_cfg.model.transformer.dropout_rate = 0.1 - restored_model_cfg.patches.grid = [224//16, 224//16] + #restored_model_cfg.patches.grid = [224//16, 224//16] return restored_model_cfg diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh index 51ecfeb52..616939cb4 100755 --- a/experimental/cityscapes/run_pretrained.sh +++ b/experimental/cityscapes/run_pretrained.sh @@ -7,6 +7,9 @@ if [ "$(uname)" = "Darwin" ] ; then # Do something under Mac OS X platform config_file='experiments/imagenet21k_segmenter_cityscapes1.py' + config_file='experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py' + #config_file='experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py' + output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" num_cores=0 tpu=False From d0446375411dd11a30c529e7572ae6430f89aeca Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 1 Feb 2022 16:57:36 -0500 Subject: [PATCH 046/150] add experiment config for different splits using vit-l32 models --- experimental/cityscapes/README.md | 2 +- ...k_segmenter_cityscapes_deterministic_10.py | 176 ++++++++++++++++++ ..._segmenter_cityscapes_deterministic_100.py | 176 ++++++++++++++++++ ...enet21k_segmenter_cityscapes_scratch_10.py | 176 ++++++++++++++++++ ...net21k_segmenter_cityscapes_scratch_100.py | 176 ++++++++++++++++++ 5 files changed, 705 insertions(+), 1 deletion(-) create mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py create mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py create mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py create mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index eb2e1d4ea..a77038797 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -8,6 +8,6 @@ [x] add pavpu metric.
[x] calculate uncertainty metrics.
-[] Add run with vit l-32 backbone.
+[x] Add run with vit l-32 backbone.
diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py new file mode 100644 index 000000000..d8106ca10 --- /dev/null +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -0,0 +1,176 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=10 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=32 +batch_size=1 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 4096 +num_heads = 16 +num_layers = 24 +hidden_size = 1024 + +if DEBUG ==5: + number_train_examples_debug = 16 + num_training_epochs = 5 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + if DEBUG == 5: + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + if DEBUG == 5: + steps_per_epoch = number_train_examples_debug // config.batch_size + else: + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + raise NotImplementedError("") + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py new file mode 100644 index 000000000..8cb0ca135 --- /dev/null +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py @@ -0,0 +1,176 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=100 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = True +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=32 +batch_size=1 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 4096 +num_heads = 16 +num_layers = 24 +hidden_size = 1024 + +if DEBUG ==5: + number_train_examples_debug = 16 + num_training_epochs = 5 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + if DEBUG == 5: + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + if DEBUG == 5: + steps_per_epoch = number_train_examples_debug // config.batch_size + else: + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + raise NotImplementedError("") + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py new file mode 100644 index 000000000..297a80f36 --- /dev/null +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py @@ -0,0 +1,176 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=10 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=32 +batch_size=1 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 4096 +num_heads = 16 +num_layers = 24 +hidden_size = 1024 + +if DEBUG ==5: + number_train_examples_debug = 16 + num_training_epochs = 5 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + if DEBUG == 5: + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + if DEBUG == 5: + steps_per_epoch = number_train_examples_debug // config.batch_size + else: + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + raise NotImplementedError("") + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py new file mode 100644 index 000000000..81e47f8f7 --- /dev/null +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py @@ -0,0 +1,176 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +TRAIN_PROP=100 + +# we will have 4 version of train split +if TRAIN_PROP == 100: + _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE + train_split = 'train' +elif TRAIN_PROP == 75: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 + train_split = 'train[:75%]' +elif TRAIN_PROP == 50: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 + train_split = 'train[:50%]' +elif TRAIN_PROP == 25: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 + train_split = 'train[:25%]' +elif TRAIN_PROP == 10: + _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 + train_split = 'train[:10%]' + +target_size = (512, 512) +LOAD_PRETRAINED_BACKBONE = False +PRETRAIN_BACKBONE_TYPE = 'base' + +STRIDE=32 +batch_size=1 +num_training_epochs = 100 # ml_collections.FieldReference(100) +log_eval_steps = 200 + +mlp_dim = 4096 +num_heads = 16 +num_layers = 24 +hidden_size = 1024 + +if DEBUG ==5: + number_train_examples_debug = 16 + num_training_epochs = 5 + +def get_config(): + """Config for cityscapes segmentation.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub_init' + + #dataset + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + + # flags to debug scenic on mac + if DEBUG == 5: + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + # model + config.model_name = 'segmenter_pretrained_mini' + config.model = ml_collections.ConfigDict() + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.classifier = 'gap' + #config.backbone_configs.grid_size + config.backbone_configs.hidden_size = hidden_size + #config.backbone_configs.patches + #config.backbone_configs.representation_size = None + + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + + #decoder + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + if DEBUG == 5: + steps_per_epoch = number_train_examples_debug // config.batch_size + else: + steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size + + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * steps_per_epoch + config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * steps_per_epoch + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 + + # extra + config.args = {} + + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE + + if PRETRAIN_BACKBONE_TYPE == 'base': + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" + elif PRETRAIN_BACKBONE_TYPE == 'gp': + raise NotImplementedError("") + else: + raise NotImplementedError("") + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) From 77d1f1e75138e6ed985b1a8ea8c8377f6c461ad7 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Wed, 2 Feb 2022 17:28:55 +0000 Subject: [PATCH 047/150] add config file with default batch size matches # tpu --- ...k_segmenter_cityscapes_deterministic_10.py | 2 +- ..._segmenter_cityscapes_deterministic_100.py | 2 +- ...enet21k_segmenter_cityscapes_scratch_10.py | 2 +- ...net21k_segmenter_cityscapes_scratch_100.py | 2 +- experimental/cityscapes/run_l32_splits_vm.sh | 34 +++++++++++++++++++ 5 files changed, 38 insertions(+), 4 deletions(-) create mode 100755 experimental/cityscapes/run_l32_splits_vm.sh diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py index d8106ca10..58e956884 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py @@ -47,7 +47,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=32 -batch_size=1 +batch_size=8 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py index 8cb0ca135..d295068db 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py @@ -47,7 +47,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=32 -batch_size=1 +batch_size=8 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py index 297a80f36..4286b736b 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py @@ -47,7 +47,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=32 -batch_size=1 +batch_size=8 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py index 81e47f8f7..4411b4b9b 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py @@ -47,7 +47,7 @@ PRETRAIN_BACKBONE_TYPE = 'base' STRIDE=32 -batch_size=1 +batch_size=8 num_training_epochs = 100 # ml_collections.FieldReference(100) log_eval_steps = 200 diff --git a/experimental/cityscapes/run_l32_splits_vm.sh b/experimental/cityscapes/run_l32_splits_vm.sh new file mode 100755 index 000000000..1bbd7b81b --- /dev/null +++ b/experimental/cityscapes/run_l32_splits_vm.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# train segmenter model on cityscapes using different pretrained backbones for different splits + +function get_config() +{ + local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1_$2.py" + echo "$config_file_name" +} + +num_cores=8 +tpu='local' +use_gpu=False + + +for rng_seed in 0 +do +for train_mode in "deterministic" #"scratch" +do +for train_split in 10 100 # 75 50 25 +do +config_file=$(get_config $train_mode $train_split) +run_name="${train_mode}_split${train_split}_seed${rng_seed}" +output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/${run_name}" +echo "Running experiment ${output_dir_ckpt}" +python3 deterministic.py --output_dir=${output_dir_ckpt} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --tpu=$tpu +done +done +done From b309462cc39ef2a2ca0e7232a90d0b1dd3e8c554 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 09:04:50 -0500 Subject: [PATCH 048/150] update default debugging params to be compatible with osx --- experimental/cityscapes/README.md | 2 ++ .../imagenet21k_segmenter_cityscapes1.py | 14 ++++++++------ experimental/cityscapes/run_pretrained.sh | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index a77038797..8066e7002 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -10,4 +10,6 @@ [x] Add run with vit l-32 backbone.
+[] Eval ensemble of vit models.
+ diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py index 188d206c3..fc997ce33 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py @@ -23,16 +23,16 @@ #import get_fewshot # local file import _CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 5 +DEBUG = 1 STRIDE = 4 target_size=(128, 128) - +train_split='train' # debug on mac if DEBUG == 1: batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 + number_train_examples_debug = 29 + number_eval_examples_debug = 29 num_training_epochs = 1 # ml_collections.FieldReference(100) log_eval_steps = 1 @@ -41,6 +41,7 @@ num_layers = 1 hidden_size = 1 # debug on v3-8: 1 epoch/16 samples/small vit + train_split='train[:1%]' elif DEBUG == 2: batch_size=8 number_train_examples_debug = 16 @@ -102,10 +103,10 @@ def get_config(): config.dataset_name = 'cityscapes' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size - config.dataset_configs.train_split='train' + config.dataset_configs.train_split=train_split # flags to debug scenic on mac config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug # config following scenic config.num_classes = 19 @@ -143,6 +144,7 @@ def get_config(): steps_per_epoch = number_train_examples_debug // config.batch_size # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.steps_per_epoch = steps_per_epoch config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh index 616939cb4..cafed9719 100755 --- a/experimental/cityscapes/run_pretrained.sh +++ b/experimental/cityscapes/run_pretrained.sh @@ -7,7 +7,7 @@ if [ "$(uname)" = "Darwin" ] ; then # Do something under Mac OS X platform config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - config_file='experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py' + #config_file='experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py' #config_file='experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py' output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" From 728e50288d51fdc900c2fb18cd2053abe64b77bc Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 09:06:57 -0500 Subject: [PATCH 049/150] add code to (1) run ensemble (2) read metrics from ensemble --- experimental/cityscapes/ensemble_eval.py | 254 +++++++++++++++++++ experimental/cityscapes/run_ensemble.sh | 59 +++++ experimental/cityscapes/run_ensemble_eval.sh | 61 +++++ 3 files changed, 374 insertions(+) create mode 100644 experimental/cityscapes/ensemble_eval.py create mode 100755 experimental/cityscapes/run_ensemble.sh create mode 100755 experimental/cityscapes/run_ensemble_eval.sh diff --git a/experimental/cityscapes/ensemble_eval.py b/experimental/cityscapes/ensemble_eval.py new file mode 100644 index 000000000..2cac1fa2a --- /dev/null +++ b/experimental/cityscapes/ensemble_eval.py @@ -0,0 +1,254 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Train vit model on cityscapes. + +Evaluate ensemble +""" + +import os +import sys + +# %% +import jax +import flax +import numpy as np +import jax.numpy as jnp +from flax.training import checkpoints + +# %% +import tensorflow as tf +# %% +from absl import app +from absl import flags +from absl import logging +from ml_collections.config_flags import config_flags +from tensorflow.io import gfile + +import custom_models +import custom_segmentation_eval +# scenic dependencies for debugging +from scenic.train_lib import train_utils +from scenic.model_lib.base_models import model_utils +from scenic.model_lib.base_models.segmentation_model import num_pixels + +# import train_utils # local file import + +#%% +config_flags.DEFINE_config_file( + 'config', None, 'Training configuration.', lock_config=True) +flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') +flags.DEFINE_string('checkpoint_dir', default=None, help='Checkpoint directory.') + +flags.DEFINE_integer( + 'num_cores', default=None, help='Unused. How many devices being used.') +flags.DEFINE_boolean( + 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') +flags.DEFINE_string('tpu', None, + 'Unused. Name of the TPU. Only used if use_gpu is False.') +flags.DEFINE_string('dataset_service_address', None, + 'Address of the tf.data service') +FLAGS = flags.FLAGS + + +def write_note(note): + if jax.process_index() == 0: + logging.info('NOTE: %s', note) + + +from clu import metric_writers + + + +def parse_checkpoint_dir(checkpoint_dir): + """Parse directory of checkpoints.""" + paths = [] + subdirectories = tf.io.gfile.glob(os.path.join(checkpoint_dir, '*')) + #is_checkpoint = lambda f: ('checkpoint' in f and '.index' in f) + is_checkpoint = lambda f: ('checkpoint' in f) + + for subdir in subdirectories: + for path, _, files in tf.io.gfile.walk(subdir): + if any(f for f in files if is_checkpoint(f)): + latest_checkpoint_without_suffix = flax.training.checkpoints.latest_checkpoint(path) + paths.append(latest_checkpoint_without_suffix) + break + return paths + + +def run(config, workdir): + """Prepares model, and dataset for training. + + This creates summary directories, summary writers, model definition, and + builds datasets to be sent to the main training script. + + Args: + config: ConfigDict; Hyper parameters. + workdir: string; Root directory for the experiment. + + Returns: + The outputs of trainer.train(), which are train_state, train_summary, and + eval_summary. + """ + lead_host = jax.process_index() == 0 + # set up the train_dir and log_dir + gfile.makedirs(workdir) + #workdir = os.path.join(workdir, 'trial') + #gfile.makedirs(workdir) + + summary_writer = None + if lead_host and config.write_summary: + tensorboard_dir = os.path.join(workdir, 'tb_summaries') + gfile.makedirs(tensorboard_dir) + # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) + summary_writer = metric_writers.SummaryWriter(tensorboard_dir) + + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + rng = jax.random.PRNGKey(config.rng_seed) + logging.info('rng: %s', rng) + + # ---------------------- + # Call Model + # ---------------------- + + model_cls = custom_models.SegmenterSegmentationModel + + # ---------------------- + # Load dataset + # ---------------------- + data_rng, rng = jax.random.split(rng) + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + + + eval_dataset_name = config.get('eval_dataset_name', 'cityscapes_val') + + write_note('Loading dataset... {}'.format(eval_dataset_name)) + + # TODO: update num_classes + if eval_dataset_name == 'cityscapes_val': + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + + return rng, model_cls, dataset, config, workdir, summary_writer + + +def main(config, output_dir,checkpoint_dir): + + print('config') + print(config) + seed = config.get('rng_seed', 0) + rng = jax.random.PRNGKey(seed) + tf.random.set_seed(seed) + + # ---------------------- + # Get dataset + # ---------------------- + + print('workdir ', output_dir) + rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) + + num_eval_examples = dataset.meta_data['num_eval_examples'] + num_eval_steps = int(np.ceil(num_eval_examples / config.batch_size)) + assert config.batch_size == 1 + + # ---------------------- + # Buils Model + # ---------------------- + + # Build dummy input + input_shape = [1] + list(dataset.meta_data['input_shape'][1:]) + #input_shape = dataset.meta_data['input_shape'] + in_st = dataset.meta_data['input_dtype'] + + dummy_input = jnp.zeros(input_shape, in_st.dtype) + + # Init model + rng, init_rng = jax.random.split(rng) + model = model_cls(config, dataset.meta_data) # extracting number of classes in meta_data + flax_model = model.flax_model + init_model_state, init_params = flax_model.init( + init_rng, dummy_input, train=False, debug=False).pop('params') + + + ensemble_filenames = parse_checkpoint_dir(checkpoint_dir) + ensemble_size = len(ensemble_filenames) + + # ---------------------- + # Evaluate models + # ---------------------- + num_eval_steps = 1 + dataset_name='trial' + # dict_keys(['batch_mask', 'inputs', 'label']) + + # ------------------------------- + # Write Model Predictions to file + # ------------------------------- + + # TODO: reset iterator + test_iterator = dataset.valid_iter + #import pdb; pdb.set_trace() + for m, ensemble_filename in enumerate(ensemble_filenames): + #train_state = checkpoints.restore_checkpoint(ensemble_filename, init_model_state) + + variables = {'params': init_params, **init_model_state} + + # assume only one test_set + #test_iterator = iter(test_dataset) + for _ in range(num_eval_steps): # num_eval_steps + eval_batch = next(dataset.valid_iter) #dict_keys(['batch_mask', 'inputs', 'label']) + inputs = eval_batch['inputs'][0] + logits, outs = flax_model.apply(variables, inputs, train=False, mutable=False) + + targets = eval_batch['label'][0] + weights = eval_batch['batch_mask'][0] + one_hot_targets = flax.training.common_utils.onehot(targets, dataset.meta_data['num_classes']) + + correct = model_utils.weighted_correctly_classified(logits, one_hot_targets, weights) + + number_pixels = num_pixels(logits,one_hot_targets,weights) + + accuracy = correct.sum()/number_pixels + + loss = model_utils.weighted_softmax_cross_entropy(logits, one_hot_targets, weights) + + + return + + +if __name__ == '__main__': + # Adds jax flags to the program. + jax.config.config_with_absl() + + # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` + # function that returns values for tests and does not directly access flags, + # and then have `main` return None. + + def _main(unused_argv): + config = FLAGS.config + output_dir = FLAGS.output_dir + checkpoint_dir = FLAGS.checkpoint_dir + main(config, output_dir, checkpoint_dir) + + app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh new file mode 100755 index 000000000..4fd1b1b19 --- /dev/null +++ b/experimental/cityscapes/run_ensemble.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +# Run deterministic + +base_output_dir="outputs/ensemble" + +declare CITYSCAPES_TRAIN_SIZE=( + ["1"]="29" + ["10"]="298" + ["25"]="744" + ["50"]="1488" + ["75"]="2231" + ["100"]="2975" + ) + +if [ "$(uname)" = "Darwin" ] ; then + # Do something under Mac OS X platform +for split in 1 +do +for model_type in "scratch" +#for model_type in "deterministic" +do +for rng_seed in 0 1 2 +do + config_file="experiments/imagenet21k_segmenter_cityscapes2.py" + output_dir="${base_output_dir}/${model_type}_split${split}_seed${rng_seed}" + num_cores=0 + tpu=False + use_gpu=False + train_split="train[:${split}%]" + num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} + python deterministic.py \ + --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --config.dataset_configs.train_split=${train_split} \ + --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ + #--config.batch_size=8 \ + #--config.upstream_model=${model_type} \ + # + +done +done +done +elif [ "$(uname)" = "Linux" ]; then + echo "in Linux" + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' + output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + num_cores=8 + tpu='local' + use_gpu=False + python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu +fi diff --git a/experimental/cityscapes/run_ensemble_eval.sh b/experimental/cityscapes/run_ensemble_eval.sh new file mode 100755 index 000000000..917c87049 --- /dev/null +++ b/experimental/cityscapes/run_ensemble_eval.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +# Run deterministic + +base_output_dir="outputs/ensemble" + +declare CITYSCAPES_TRAIN_SIZE=( + ["1"]="29" + ["10"]="298" + ["25"]="744" + ["50"]="1488" + ["75"]="2231" + ["100"]="2975" + ) + +if [ "$(uname)" = "Darwin" ] ; then + # Do something under Mac OS X platform +for split in 1 +do +for model_type in "scratch" +#for model_type in "deterministic" +do +for rng_seed in 0 +do + config_file="experiments/imagenet21k_segmenter_cityscapes2.py" + output_dir="${base_output_dir}/eval" + checkpoint_dir="${base_output_dir}" + num_cores=0 + tpu=False + use_gpu=False + train_split="train[:${split}%]" + num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} + python ensemble_eval.py \ + --output_dir=${output_dir} \ + --checkpoint_dir=${checkpoint_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + #--config.rng_seed=${rng_seed} \ + #--config.dataset_configs.train_split=${train_split} \ + #--config.dataset_configs.number_train_examples_debug=${num_train_examples} \ + #--config.batch_size=8 \ + #--config.upstream_model=${model_type} \ + # + +done +done +done +elif [ "$(uname)" = "Linux" ]; then + echo "in Linux" + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' + output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + num_cores=8 + tpu='local' + use_gpu=False + python3 deterministic.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu +fi From 14fcb468e0d5b197ddd52bf4e4d1ed5ff0ecacba Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 10:02:24 -0500 Subject: [PATCH 050/150] include new experiments to eval changes for learning_rate, # training epochs, pretrained backbone seed and train split --- experimental/cityscapes/deterministic.py | 5 +-- ..._segmenter_cityscapes_deterministic_100.py | 2 +- experimental/cityscapes/run_l32_splits_vm.sh | 41 ++++++++++++++++--- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index 76bd9c1f0..1dbff2e5d 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -15,10 +15,7 @@ """ Train vit model on cityscapes. - -Step 1: aim to train model on cityscapes for 1 step -# Runs with - +See run_pretrained.sh for an example """ import os diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py index d295068db..43bfc5bc2 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py @@ -48,7 +48,7 @@ STRIDE=32 batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) +num_training_epochs = ml_collections.FieldReference(100) log_eval_steps = 200 mlp_dim = 4096 diff --git a/experimental/cityscapes/run_l32_splits_vm.sh b/experimental/cityscapes/run_l32_splits_vm.sh index 1bbd7b81b..2ce27bea1 100755 --- a/experimental/cityscapes/run_l32_splits_vm.sh +++ b/experimental/cityscapes/run_l32_splits_vm.sh @@ -1,6 +1,17 @@ #!/bin/bash -# train segmenter model on cityscapes using different pretrained backbones for different splits +: ' +train segmenter model on cityscapes using different pretrained backbones for different splits + +Other parameters: number of training epochs, learning_rate, train_mode. + + +To List checkpoints run: +gsutil ls gs://ub-ekb/segmenter/cityscapes/run_splits_l32 + + + +' function get_config() { @@ -8,27 +19,45 @@ function get_config() echo "$config_file_name" } +function get_pretrained_backbone_path() +{ + local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" + echo "$checkpoint_path" +} num_cores=8 tpu='local' use_gpu=False - -for rng_seed in 0 +for num_training_epochs in 100 #30 50 150 +do +for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" +do +for rng_seed in 1 2 3 4 do for train_mode in "deterministic" #"scratch" do -for train_split in 10 100 # 75 50 25 +for train_split in 100 # 75 50 25 10 do +learning_rate=$( echo "$lr" | bc ) config_file=$(get_config $train_mode $train_split) -run_name="${train_mode}_split${train_split}_seed${rng_seed}" -output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/${run_name}" +run_name="${train_mode}_split${train_split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" +output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints/${run_name}" +pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) +echo "${pretrained_backbone}" echo "Running experiment ${output_dir_ckpt}" +#: ' python3 deterministic.py --output_dir=${output_dir_ckpt} \ --num_cores=$num_cores \ --use_gpu=$use_gpu \ --config=${config_file} \ --config.rng_seed=${rng_seed} \ + --config.lr_configs.base_learning_rate=${learning_rate} \ + --config.num_training_epochs=${num_training_epochs} \ --tpu=$tpu +# --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ +#' +done +done done done done From f7fa22603a3f4103cb60048cece42d36a0b4ed4d Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 11:51:57 -0500 Subject: [PATCH 051/150] add code to eval vitl32 models --- experimental/cityscapes/README.md | 4 +- .../cityscapes/deterministic_eval_l32.py | 170 ++++++++++++++++++ .../cityscapes/run_deterministic_eval_l32.sh | 46 +++++ 3 files changed, 218 insertions(+), 2 deletions(-) create mode 100644 experimental/cityscapes/deterministic_eval_l32.py create mode 100755 experimental/cityscapes/run_deterministic_eval_l32.sh diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index a77038797..623c2bbe0 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -8,6 +8,6 @@ [x] add pavpu metric.
[x] calculate uncertainty metrics.
-[x] Add run with vit l-32 backbone.
- +[x] add run with vit l-32 backbone: run_l32_splits_vm.sh
+[x] add eval for vit l-32 models: run_deterministic_eval_l32.sh
diff --git a/experimental/cityscapes/deterministic_eval_l32.py b/experimental/cityscapes/deterministic_eval_l32.py new file mode 100644 index 000000000..09571dd07 --- /dev/null +++ b/experimental/cityscapes/deterministic_eval_l32.py @@ -0,0 +1,170 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Eval vit model on cityscapes. +""" + +import os +import sys + +# %% +import jax +# %% +import tensorflow as tf +# %% +from absl import app +from absl import flags +from absl import logging +from ml_collections.config_flags import config_flags +from tensorflow.io import gfile + +import custom_models +import custom_segmentation_eval +# scenic dependencies for debugging +from scenic.train_lib import train_utils + +# import train_utils # local file import +import pandas as pd + +#%% +config_flags.DEFINE_config_file( + 'config', None, 'Training configuration.', lock_config=True) +flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') +flags.DEFINE_integer( + 'num_cores', default=None, help='Unused. How many devices being used.') +flags.DEFINE_boolean( + 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') +flags.DEFINE_string('tpu', None, + 'Unused. Name of the TPU. Only used if use_gpu is False.') +flags.DEFINE_string('dataset_service_address', None, + 'Address of the tf.data service') +FLAGS = flags.FLAGS + + +def write_note(note): + if jax.process_index() == 0: + logging.info('NOTE: %s', note) + + +from clu import metric_writers + + +def run(config, workdir): + """Prepares model, and dataset for training. + + This creates summary directories, summary writers, model definition, and + builds datasets to be sent to the main training script. + + Args: + config: ConfigDict; Hyper parameters. + workdir: string; Root directory for the experiment. + + Returns: + The outputs of trainer.train(), which are train_state, train_summary, and + eval_summary. + """ + lead_host = jax.process_index() == 0 + # set up the train_dir and log_dir + gfile.makedirs(workdir) + #workdir = os.path.join(workdir, 'trial') + #gfile.makedirs(workdir) + + summary_writer = None + if lead_host and config.write_summary: + tensorboard_dir = os.path.join(workdir, 'tb_summaries') + gfile.makedirs(tensorboard_dir) + # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) + summary_writer = metric_writers.SummaryWriter(tensorboard_dir) + + device_count = jax.device_count() + logging.info('device_count: %d', device_count) + logging.info('num_hosts : %d', jax.process_count()) + logging.info('host_id : %d', jax.process_index()) + + rng = jax.random.PRNGKey(config.rng_seed) + logging.info('rng: %s', rng) + + model_cls = custom_models.SegmenterSegmentationModel + + # ---------------------- + # Load dataset + # ---------------------- + data_rng, rng = jax.random.split(rng) + # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + import resource + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) + write_note('Loading dataset...') + + # TODO: update num_classes + dataset = train_utils.get_dataset( + config, data_rng, dataset_service_address=FLAGS.dataset_service_address) + + return rng, model_cls, dataset, config, workdir, summary_writer + + +def main(config, output_dir): + + print('config') + print(config) + seed = config.get('rng_seed', 0) + rng = jax.random.PRNGKey(seed) + tf.random.set_seed(seed) + + print('workdir ', output_dir) + rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) + print('workdir ', workdir) + + # ---------------------- + # Eval function + # ---------------------- + eval_fn = custom_segmentation_eval.eval1 + + # models + for rng_seed in [0]: #,1,2,3,4]: + for train_mode in ["deterministic"]:#,"scratch","gp"]: + for train_split in [100]: + run_name="{}_split{}_seed{}".format(train_mode, train_split, rng_seed) + tmp_workdir="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/{}".format(run_name) + print("temp directory", tmp_workdir) + tmp_resultsdir="results_l32/metrics/{}.csv".format(run_name) + #import pdb; pdb.set_trace(); + train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, + config=config, + workdir=tmp_workdir, writer=summary_writer) + print(eval_summary) + #import pdb;pdb.set_trace() + df = pd.DataFrame([eval_summary]) + df.to_csv (r'{}'.format(tmp_resultsdir), index = False, header=True) + + return + + +if __name__ == '__main__': + # Adds jax flags to the program. + jax.config.config_with_absl() + + # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` + # function that returns values for tests and does not directly access flags, + # and then have `main` return None. + + def _main(unused_argv): + config = FLAGS.config + output_dir = FLAGS.output_dir + main(config, output_dir) + + app.run(_main) # Ignore the returned values from `main`. diff --git a/experimental/cityscapes/run_deterministic_eval_l32.sh b/experimental/cityscapes/run_deterministic_eval_l32.sh new file mode 100755 index 000000000..c1ff7e531 --- /dev/null +++ b/experimental/cityscapes/run_deterministic_eval_l32.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# train cityscapes using segmenter with pretrained backbone +# supports 2 options to + +function get_config() +{ + local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1_$2.py" + echo "$config_file_name" +} + +if [ "$(uname)" = "Darwin" ] ; then + # Do something under Mac OS X platform + config_file='experiments/imagenet21k_segmenter_cityscapes1.py' + output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" + num_cores=0 + tpu=False + use_gpu=False + python deterministic_eval.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + # --tpu=$tpu +elif [ "$(uname)" = "Linux" ]; then + echo "in Linux" + train_mode="deterministic" + train_split=100 + rng_seed=0 + config_file=$(get_config $train_mode $train_split) + run_name="${train_mode}_split${train_split}_seed${rng_seed}" + #config_file='experiments/imagenet21k_segmenter_cityscapes13.py' + #output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" + output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/${run_name}" + num_cores=8 + tpu='local' + use_gpu=False + python3 deterministic_eval_l32.py --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --tpu=$tpu +# --config.batch_size=${batch_size} \ + +fi + +#%% From cac843f30ae432ed9043859d79eb9241962ad7fd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 15:55:39 -0500 Subject: [PATCH 052/150] add code compatible with wandb for hyperparam search --- experimental/cityscapes/deterministic.py | 31 +++++++++++++++++++ experimental/cityscapes/run_ensemble.sh | 38 ++++++++++-------------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index 1dbff2e5d..98f0a7a4a 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -39,6 +39,10 @@ # import train_utils # local file import +import wandb +import pathlib +import datetime + #%% config_flags.DEFINE_config_file( 'config', None, 'Training configuration.', lock_config=True) @@ -125,6 +129,25 @@ def main(config, output_dir): rng = jax.random.PRNGKey(seed) tf.random.set_seed(seed) + # Wandb Setup + if config.use_wandb: + pathlib.Path(config.wandb_dir).mkdir(parents=True, exist_ok=True) + wandb_args = dict( + project=config.wandb_project, + entity='ub_rdl_big_paper', + dir=config.wandb_dir, + reinit=True, + name=config.wandb_exp_name, + group=config.wandb_exp_group) + wandb_run = wandb.init(**wandb_args) + wandb.config.update(FLAGS, allow_val_change=True) + output_dir = str( + os.path.join(output_dir, + datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))) + else: + wandb_run = None + #output_dir = FLAGS.output_dir + print('workdir ', output_dir) rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) print('workdir ', workdir) @@ -139,6 +162,14 @@ def main(config, output_dir): workdir=output_dir, writer=summary_writer) print(train_summary) + #import pdb; pdb.set_trace() + if config.use_wandb: + epoch = int(train_state.global_step) + wandb.log(train_summary, step=epoch) + wandb.log(eval_summary, step=epoch) + + if wandb_run is not None: + wandb_run.finish() return diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh index 4fd1b1b19..9ca67b46d 100755 --- a/experimental/cityscapes/run_ensemble.sh +++ b/experimental/cityscapes/run_ensemble.sh @@ -13,20 +13,27 @@ declare CITYSCAPES_TRAIN_SIZE=( ["100"]="2975" ) +# Debug on Mac OS X platform +use_gpu=False if [ "$(uname)" = "Darwin" ] ; then - # Do something under Mac OS X platform -for split in 1 +tpu=False +num_cores=0 +batch_size=1 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +for split in 10 do for model_type in "scratch" #for model_type in "deterministic" do -for rng_seed in 0 1 2 +for rng_seed in 0 # 1 2 do - config_file="experiments/imagenet21k_segmenter_cityscapes2.py" + config_file="experiments/imagenet21k_segmenter_cityscapes3.py" output_dir="${base_output_dir}/${model_type}_split${split}_seed${rng_seed}" - num_cores=0 - tpu=False - use_gpu=False train_split="train[:${split}%]" num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} python deterministic.py \ @@ -37,23 +44,10 @@ do --config.rng_seed=${rng_seed} \ --config.dataset_configs.train_split=${train_split} \ --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ - #--config.batch_size=8 \ + --config.batch_size=${batch_size} \ + --tpu=${tpu} \ #--config.upstream_model=${model_type} \ - # done done done -elif [ "$(uname)" = "Linux" ]; then - echo "in Linux" - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" - num_cores=8 - tpu='local' - use_gpu=False - python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu -fi From e46879f25d5a0851e4f15b311a35618db158a336 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 16:00:39 -0500 Subject: [PATCH 053/150] update readme --- experimental/cityscapes/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 27ee574ce..6930aa9bc 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -13,7 +13,8 @@ Under development (no tpu compatibility) [x] add run to train ensemble models: run_ensemble.sh
+[x] wandb integration: run_ensemble.sh (limited to final step logging)
+ [] Eval ensemble models: run_ensemble_eval
-[] wandb integration .
From 6733d65318ea162c514163942b19af49ee80ce78 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 16:05:30 -0500 Subject: [PATCH 054/150] add config file compatible with wandb --- .../imagenet21k_segmenter_cityscapes3.py | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py new file mode 100644 index 000000000..55a72fe86 --- /dev/null +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py @@ -0,0 +1,129 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +include wandb parameters +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 1 +STRIDE = 4 + +target_size=(128, 128) + +batch_size = 1 +number_train_examples_debug = 29 +number_eval_examples_debug = 29 +num_training_epochs = ml_collections.FieldReference(1) + +mlp_dim = 2 +num_heads = 1 +num_layers = 1 +hidden_size = 1 +train_split = 'train[:1%]' + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + # flags to debug scenic + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.steps_per_epoch = config.dataset_configs.get_ref('number_train_examples_debug') // config.get_ref('batch_size') + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' + #config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref('num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + config.upstream_model = 'scratch' + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # wabdb + config.use_wandb=True + config.wandb_dir= 'wandb' + config.wandb_project = 'rdl-visual' # Wandb project name. + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_group = None # Give experiment a group name. + + return config + + +def get_sweep(hyper): + return hyper.product([]) From a06a446f53dc3ea0dc3cb769dbd6bb504da1f4d4 Mon Sep 17 00:00:00 2001 From: ekellbuch Date: Tue, 8 Feb 2022 21:09:22 +0000 Subject: [PATCH 055/150] update bash script to run with bash and not sh --- experimental/cityscapes/run_ensemble.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh index 9ca67b46d..157ff2a15 100755 --- a/experimental/cityscapes/run_ensemble.sh +++ b/experimental/cityscapes/run_ensemble.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Run deterministic From f2641ff19edc54e286432d82bdae8847b52fc7ac Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Feb 2022 16:32:02 -0500 Subject: [PATCH 056/150] update code so wandb inherits tensorboard logs --- experimental/cityscapes/README.md | 2 +- experimental/cityscapes/deterministic.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 6930aa9bc..726200b7b 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -13,7 +13,7 @@ Under development (no tpu compatibility) [x] add run to train ensemble models: run_ensemble.sh
-[x] wandb integration: run_ensemble.sh (limited to final step logging)
+[x] wandb integration: run_ensemble.sh
[] Eval ensemble models: run_ensemble_eval
diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index 98f0a7a4a..e52a7b074 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -131,14 +131,16 @@ def main(config, output_dir): # Wandb Setup if config.use_wandb: - pathlib.Path(config.wandb_dir).mkdir(parents=True, exist_ok=True) + #pathlib.Path(config.wandb_dir).mkdir(parents=True, exist_ok=True) + gfile.makedirs(config.wandb_dir) wandb_args = dict( project=config.wandb_project, entity='ub_rdl_big_paper', dir=config.wandb_dir, reinit=True, name=config.wandb_exp_name, - group=config.wandb_exp_group) + group=config.wandb_exp_group, + sync_tensorboard=True) wandb_run = wandb.init(**wandb_args) wandb.config.update(FLAGS, allow_val_change=True) output_dir = str( @@ -162,11 +164,6 @@ def main(config, output_dir): workdir=output_dir, writer=summary_writer) print(train_summary) - #import pdb; pdb.set_trace() - if config.use_wandb: - epoch = int(train_state.global_step) - wandb.log(train_summary, step=epoch) - wandb.log(eval_summary, step=epoch) if wandb_run is not None: wandb_run.finish() From 97aca3362309263c8ffe4665c555b5a8839339b2 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 9 Feb 2022 12:03:27 -0500 Subject: [PATCH 057/150] add early stopping flag --- experimental/cityscapes/README.md | 2 +- .../cityscapes/custom_segmentation_trainer.py | 28 ++++++++++++++++++- .../imagenet21k_segmenter_cityscapes3.py | 1 + 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 726200b7b..096268586 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -14,7 +14,7 @@ Under development (no tpu compatibility) [x] add run to train ensemble models: run_ensemble.sh
[x] wandb integration: run_ensemble.sh
- +[x] add early stopping flag
[] Eval ensemble models: run_ensemble_eval
diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index 04d9ba684..b00752754 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -419,6 +419,12 @@ def to_cpu(x): step0_log['gflops'] = gflops writer.write_scalars(1, step0_log) + # Early stopping flags + best_opt_accuracy = -1 + best_epoch = 1 + force_out = 0 + early_stopping_patience = config.get('early_stopping_patience') or 20 + for step in range(start_step + 1, total_steps + 1): with jax.profiler.StepTraceContext('train', sfLtep_num=step): train_batch = next(dataset.train_iter) @@ -459,6 +465,7 @@ def to_cpu(x): extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, extra_training_logs), writer=writer) + # Reset metric accumulation for next evaluation cycle. train_metrics, extra_training_logs = [], [] @@ -469,8 +476,24 @@ def to_cpu(x): train_state = train_utils.sync_model_state_across_replicas(train_state) eval_summary = evaluate(train_state, step) + # here check value + current_epoch = step % log_eval_steps + val_accuracy = eval_summary['accuracy'] + if val_accuracy >= best_opt_accuracy: + best_epoch = current_epoch + best_opt_accuracy = val_accuracy + # best_opt_repl = jax.device_get(opt_repl) + else: + logging.info( + msg=(f'Current val accuracy {val_accuracy} ' + f'(vs {best_opt_accuracy})')) + if current_epoch - best_epoch >= early_stopping_patience: + logging.info(msg='Early stopping, returning best opt!') + # force checkpoint + force_out = 1 + if ((step % checkpoint_steps == 0 and step > 0) or - (step == total_steps)) and config.checkpoint: + (step == total_steps) or (force_out == 1)) and config.checkpoint: ################### CHECK POINTING ########################## with report_progress.timed('checkpoint'): # Sync model state across replicas. @@ -480,6 +503,9 @@ def to_cpu(x): accum_train_time=chrono.accum_train_time) train_utils.save_checkpoint(workdir, train_state) + if force_out == 1: + break + chrono.resume() # Un-pause now. # Wait until computations are done before exiting. diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py index 55a72fe86..0d5c7ace1 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py @@ -122,6 +122,7 @@ def get_config(): config.wandb_exp_name = None # Give experiment a name. config.wandb_exp_group = None # Give experiment a group name. + config.early_stopping_patience = 3 # number of epochs to wait before stopping training return config From 6add97855280a10f5ae3bd5bc2cccf6a4544598d Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 9 Feb 2022 12:35:21 -0500 Subject: [PATCH 058/150] add deterministic --- ...t21k_segmenter_cityscapes_deterministic.py | 160 ++++++++++++++++++ experimental/cityscapes/run_ensemble.sh | 38 ++++- experimental/cityscapes/run_ensemble_debug.sh | 53 ++++++ 3 files changed, 243 insertions(+), 8 deletions(-) create mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py create mode 100755 experimental/cityscapes/run_ensemble_debug.sh diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py new file mode 100644 index 000000000..167c77502 --- /dev/null +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py @@ -0,0 +1,160 @@ +# coding=utf-8 +# Copyright 2021 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Segmenter + cityscapes. + +include wandb parameters +""" +# pylint: enable=line-too-long + +import ml_collections +#import get_fewshot # local file import + +_CITYSCAPES_TRAIN_SIZE = 2975 +DEBUG = 0 + +STRIDE = 32 +target_size=(512, 512) + +batch_size = 8 +number_train_examples_debug = 2975 +num_training_epochs = ml_collections.FieldReference(100) + +mlp_dim = 4096 +num_heads = 16 +num_layers = 24 +hidden_size = 1024 +train_split = 'train[:1%]' + +LOAD_PRETRAINED_BACKBONE=True + +if DEBUG ==1: + STRIDE = 4 + target_size = (128, 128) + + batch_size = 1 + number_train_examples_debug = 29 + num_training_epochs = ml_collections.FieldReference(1) + + mlp_dim = 2 + num_heads = 1 + num_layers = 1 + hidden_size = 1 + train_split = 'train[:1%]' + + +def get_config(): + """Config for training a patch-transformer on JFT.""" + config = ml_collections.ConfigDict() + + config.experiment_name = 'cityscapes_segvit_ub' + + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = train_split + # flags to debug scenic + config.dataset_configs.number_train_examples_debug = number_train_examples_debug + #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug + + # config following scenic + config.num_classes = 19 + + config.patches = ml_collections.ConfigDict() + config.patches.size = (STRIDE, STRIDE) + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = 'vit' + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.classifier = 'gap' + + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.hidden_size = hidden_size + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = 'linear' + + # training + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = num_training_epochs + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # learning rate + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.steps_per_epoch = config.dataset_configs.get_ref('number_train_examples_debug') // config.get_ref('batch_size') + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref('num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.upstream_model = 'deterministic' + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = get_pretrained_backbone_config(config) + + #logging + config.write_summary = True # write TB and/or XM summary + config.write_xm_measurements = True # write XM measurements + #config.xprof = False # Profile using xprof + config.checkpoint = True # do checkpointing + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = True # debug mode during training + config.debug_eval = True # debug mode during eval + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # wabdb + config.use_wandb=True + config.wandb_dir= 'wandb' + config.wandb_project = 'rdl-visual' # Wandb project name. + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_group = None # Give experiment a group name. + + config.early_stopping_patience = 3 # number of epochs to wait before stopping training + return config + + +def get_pretrained_backbone_config(config): + if not config.load_pretrained_backbone: + return None + pretrained_backbone_configs = ml_collections.ConfigDict() + pretrained_backbone_configs.checkpoint_format = "ub" + pretrained_backbone_configs.type = 'base' + + pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" + pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" + + return pretrained_backbone_configs + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh index 157ff2a15..fc4045759 100755 --- a/experimental/cityscapes/run_ensemble.sh +++ b/experimental/cityscapes/run_ensemble.sh @@ -1,8 +1,21 @@ #!/bin/bash -# Run deterministic +# train segmenter model on cityscapes using different pretrained backbones for different splits -base_output_dir="outputs/ensemble" +function get_config() +{ + local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1.py" + echo "$config_file_name" +} + +function get_pretrained_backbone_path() +{ + local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" + echo "$checkpoint_path" +} + +#base_output_dir="outputs/ensemble" +base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" declare CITYSCAPES_TRAIN_SIZE=( ["1"]="29" @@ -24,16 +37,20 @@ tpu='local' num_cores=8 batch_size=8 fi - -for split in 10 +for num_training_epochs in 50 #30 50 150 do -for model_type in "scratch" -#for model_type in "deterministic" +for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" do for rng_seed in 0 # 1 2 do - config_file="experiments/imagenet21k_segmenter_cityscapes3.py" - output_dir="${base_output_dir}/${model_type}_split${split}_seed${rng_seed}" +for model_type in "deterministic" +do +for split in 100 +do + config_file=$(get_config $model_type) + learning_rate=$( echo "$lr" | bc ) + run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" + output_dir="${base_output_dir}/${run_name}" train_split="train[:${split}%]" num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} python deterministic.py \ @@ -46,8 +63,13 @@ do --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ --config.batch_size=${batch_size} \ --tpu=${tpu} \ + --config.lr_configs.base_learning_rate=${learning_rate} \ + --config.num_training_epochs=${num_training_epochs} \ #--config.upstream_model=${model_type} \ + # --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ done done done +done +done \ No newline at end of file diff --git a/experimental/cityscapes/run_ensemble_debug.sh b/experimental/cityscapes/run_ensemble_debug.sh new file mode 100755 index 000000000..157ff2a15 --- /dev/null +++ b/experimental/cityscapes/run_ensemble_debug.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Run deterministic + +base_output_dir="outputs/ensemble" + +declare CITYSCAPES_TRAIN_SIZE=( + ["1"]="29" + ["10"]="298" + ["25"]="744" + ["50"]="1488" + ["75"]="2231" + ["100"]="2975" + ) + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=0 +batch_size=1 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +for split in 10 +do +for model_type in "scratch" +#for model_type in "deterministic" +do +for rng_seed in 0 # 1 2 +do + config_file="experiments/imagenet21k_segmenter_cityscapes3.py" + output_dir="${base_output_dir}/${model_type}_split${split}_seed${rng_seed}" + train_split="train[:${split}%]" + num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} + python deterministic.py \ + --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --config.dataset_configs.train_split=${train_split} \ + --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ + --config.batch_size=${batch_size} \ + --tpu=${tpu} \ + #--config.upstream_model=${model_type} \ + +done +done +done From a9240f1e3bff4cd0e7587f16e30568819d842de9 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 9 Feb 2022 20:13:32 -0500 Subject: [PATCH 059/150] fix bug where checkpoints where not stored in gs when wandb was used: by turning off profiler --- experimental/cityscapes/README.md | 9 ++++ .../cityscapes/custom_segmentation_trainer.py | 6 ++- .../imagenet21k_segmenter_cityscapes3.py | 2 +- ...t21k_segmenter_cityscapes_deterministic.py | 2 +- experimental/cityscapes/run_ensemble.sh | 2 +- experimental/cityscapes/run_ensemble_debug.sh | 49 +++++++++++++++---- 6 files changed, 57 insertions(+), 13 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 096268586..ff26f4f26 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -18,3 +18,12 @@ Under development (no tpu compatibility) [] Eval ensemble models: run_ensemble_eval
+Code to run +[] Vanilla deterministic upstream + deterministic downstream.
+[] Ensemble (ensemble upstream + ensemble downstream).
+[] Ensemble (ensemble upstream + deterministic downstream).
+[] Ensemble (BatchEnsemble upstream + deterministic downstream).
+ + +Debug code +[] Run ensemble models: diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index b00752754..ad6728b4d 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -422,6 +422,7 @@ def to_cpu(x): # Early stopping flags best_opt_accuracy = -1 best_epoch = 1 + current_epoch=1 force_out = 0 early_stopping_patience = config.get('early_stopping_patience') or 20 @@ -444,6 +445,7 @@ def to_cpu(x): for h in hooks: h(step) chrono.pause() # Below are once-in-a-while ops -> pause. + if step % log_summary_steps == 0 or (step == total_steps): ############### LOG TRAIN SUMMARY ############### if lead_host: @@ -477,7 +479,7 @@ def to_cpu(x): eval_summary = evaluate(train_state, step) # here check value - current_epoch = step % log_eval_steps + #current_epoch = int(step // log_eval_steps) val_accuracy = eval_summary['accuracy'] if val_accuracy >= best_opt_accuracy: best_epoch = current_epoch @@ -491,6 +493,7 @@ def to_cpu(x): logging.info(msg='Early stopping, returning best opt!') # force checkpoint force_out = 1 + current_epoch+=1 if ((step % checkpoint_steps == 0 and step > 0) or (step == total_steps) or (force_out == 1)) and config.checkpoint: @@ -504,6 +507,7 @@ def to_cpu(x): train_utils.save_checkpoint(workdir, train_state) if force_out == 1: + # flag turned on due to early stopping break chrono.resume() # Un-pause now. diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py index 0d5c7ace1..68ede24eb 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py @@ -107,7 +107,7 @@ def get_config(): #logging config.write_summary = True # write TB and/or XM summary config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof + config.xprof = False # Profile using xprof config.checkpoint = True # do checkpointing config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py index 167c77502..ce69c730b 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py +++ b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py @@ -124,7 +124,7 @@ def get_config(): #logging config.write_summary = True # write TB and/or XM summary config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof + config.xprof = False # Profile using xprof config.checkpoint = True # do checkpointing config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh index fc4045759..095b59b4a 100755 --- a/experimental/cityscapes/run_ensemble.sh +++ b/experimental/cityscapes/run_ensemble.sh @@ -41,7 +41,7 @@ for num_training_epochs in 50 #30 50 150 do for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" do -for rng_seed in 0 # 1 2 +for rng_seed in 0 1 2 do for model_type in "deterministic" do diff --git a/experimental/cityscapes/run_ensemble_debug.sh b/experimental/cityscapes/run_ensemble_debug.sh index 157ff2a15..4ded8bcfe 100755 --- a/experimental/cityscapes/run_ensemble_debug.sh +++ b/experimental/cityscapes/run_ensemble_debug.sh @@ -1,8 +1,30 @@ #!/bin/bash -# Run deterministic +# debug for run_ensemble +# train segmenter model on cityscapes using different pretrained backbones for different splits -base_output_dir="outputs/ensemble" +# debug for model +DEBUG=1 + + +function get_config() +{ + #local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1.py" + local config_file_name="experiments/imagenet21k_segmenter_cityscapes3.py" + + echo "$config_file_name" +} + +function get_pretrained_backbone_path() +{ + local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" + echo "$checkpoint_path" +} + +# base_output_dir="outputs/ensemble" + +#base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" +base_output_dir='gs://ub-ekb/segmenter/cityscapes/run_splits_debug/checkpoints' declare CITYSCAPES_TRAIN_SIZE=( ["1"]="29" @@ -24,16 +46,20 @@ tpu='local' num_cores=8 batch_size=8 fi - -for split in 10 +for num_training_epochs in 5 #30 50 150 +do +for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" do -for model_type in "scratch" -#for model_type in "deterministic" +for rng_seed in 1 do -for rng_seed in 0 # 1 2 +for model_type in "deterministic" do - config_file="experiments/imagenet21k_segmenter_cityscapes3.py" - output_dir="${base_output_dir}/${model_type}_split${split}_seed${rng_seed}" +for split in 1 +do + config_file=$(get_config $model_type) + learning_rate=$( echo "$lr" | bc ) + run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" + output_dir="${base_output_dir}/${run_name}" train_split="train[:${split}%]" num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} python deterministic.py \ @@ -46,8 +72,13 @@ do --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ --config.batch_size=${batch_size} \ --tpu=${tpu} \ + --config.lr_configs.base_learning_rate=${learning_rate} \ + --config.num_training_epochs=${num_training_epochs} \ #--config.upstream_model=${model_type} \ + # --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ done done done +done +done \ No newline at end of file From 62d27fb9163d1b93d355f009b6511eb6c42b05d2 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 10 Feb 2022 16:12:49 -0500 Subject: [PATCH 060/150] integrate code for hyperparameter sweep in wanbd --- experimental/cityscapes/README.md | 17 +- experimental/cityscapes/deterministic.py | 5 +- .../imagenet21k_segmenter_cityscapes3.py | 13 +- ...k_segmenter_cityscapes_deterministic_10.py | 176 ------------------ ..._segmenter_cityscapes_deterministic_100.py | 176 ------------------ ...enet21k_segmenter_cityscapes_scratch_10.py | 176 ------------------ ...net21k_segmenter_cityscapes_scratch_100.py | 176 ------------------ ...t21k_segmenter_cityscapes_deterministic.py | 10 +- ...1k_segmenter_cityscapes_deterministic.yaml | 43 +++++ 9 files changed, 77 insertions(+), 715 deletions(-) delete mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py delete mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py delete mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py delete mode 100644 experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py rename experimental/cityscapes/experiments/{splits_l32 => sweep_vit32}/imagenet21k_segmenter_cityscapes_deterministic.py (94%) create mode 100644 experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index ff26f4f26..9bda0f550 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -13,17 +13,26 @@ Under development (no tpu compatibility) [x] add run to train ensemble models: run_ensemble.sh
-[x] wandb integration: run_ensemble.sh
[x] add early stopping flag
[] Eval ensemble models: run_ensemble_eval
+[Wandb integration ](https://docs.wandb.ai/guides/sweeps/configuration)
+[x] Visualize results in wandb: run_ensemble.sh
+[x] Hyperparameter sweep: experiments/toy/toy_sweep
-Code to run +``` +wandb sweep experiments/toy/toy_sweep.yaml +wandb agent ${SWEEPID} +``` + +Code to run:
[] Vanilla deterministic upstream + deterministic downstream.
+``` +wandb sweep experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml +wandb agent ${SWEEPID} +``` [] Ensemble (ensemble upstream + ensemble downstream).
[] Ensemble (ensemble upstream + deterministic downstream).
[] Ensemble (BatchEnsemble upstream + deterministic downstream).
-Debug code -[] Run ensemble models: diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py index e52a7b074..754a88656 100644 --- a/experimental/cityscapes/deterministic.py +++ b/experimental/cityscapes/deterministic.py @@ -135,7 +135,7 @@ def main(config, output_dir): gfile.makedirs(config.wandb_dir) wandb_args = dict( project=config.wandb_project, - entity='ub_rdl_big_paper', + entity=config.get('wandb_entity', 'ub_rdl_big_paper'), dir=config.wandb_dir, reinit=True, name=config.wandb_exp_name, @@ -145,7 +145,8 @@ def main(config, output_dir): wandb.config.update(FLAGS, allow_val_change=True) output_dir = str( os.path.join(output_dir, - datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))) + config.wandb_exp_name + )) else: wandb_run = None #output_dir = FLAGS.output_dir diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py index 68ede24eb..7ce7eb2d1 100644 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py +++ b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py @@ -21,6 +21,9 @@ # pylint: enable=line-too-long import ml_collections +import os.path +import datetime + #import get_fewshot # local file import _CITYSCAPES_TRAIN_SIZE = 2975 @@ -116,10 +119,14 @@ def get_config(): config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') # wabdb - config.use_wandb=True - config.wandb_dir= 'wandb' + config.use_wandb =True + config.wandb_dir = 'wandb' config.wandb_project = 'rdl-visual' # Wandb project name. - config.wandb_exp_name = None # Give experiment a name. + config.wandb_entity = 'ub_rdl_big_paper' + #config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) config.wandb_exp_group = None # Give experiment a group name. config.early_stopping_patience = 3 # number of epochs to wait before stopping training diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py deleted file mode 100644 index 58e956884..000000000 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=10 - -# we will have 4 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=32 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 4096 -num_heads = 16 -num_layers = 24 -hidden_size = 1024 - -if DEBUG ==5: - number_train_examples_debug = 16 - num_training_epochs = 5 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - if DEBUG == 5: - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - if DEBUG == 5: - steps_per_epoch = number_train_examples_debug // config.batch_size - else: - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - raise NotImplementedError("") - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py deleted file mode 100644 index 43bfc5bc2..000000000 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_100.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=100 - -# we will have 4 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=32 -batch_size=8 -num_training_epochs = ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 4096 -num_heads = 16 -num_layers = 24 -hidden_size = 1024 - -if DEBUG ==5: - number_train_examples_debug = 16 - num_training_epochs = 5 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - if DEBUG == 5: - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - if DEBUG == 5: - steps_per_epoch = number_train_examples_debug // config.batch_size - else: - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - raise NotImplementedError("") - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py deleted file mode 100644 index 4286b736b..000000000 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_10.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=10 - -# we will have 4 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=32 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 4096 -num_heads = 16 -num_layers = 24 -hidden_size = 1024 - -if DEBUG ==5: - number_train_examples_debug = 16 - num_training_epochs = 5 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - if DEBUG == 5: - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - if DEBUG == 5: - steps_per_epoch = number_train_examples_debug // config.batch_size - else: - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - raise NotImplementedError("") - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py deleted file mode 100644 index 4411b4b9b..000000000 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_scratch_100.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=100 - -# we will have 4 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=32 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 4096 -num_heads = 16 -num_layers = 24 -hidden_size = 1024 - -if DEBUG ==5: - number_train_examples_debug = 16 - num_training_epochs = 5 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - if DEBUG == 5: - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - if DEBUG == 5: - steps_per_epoch = number_train_examples_debug // config.batch_size - else: - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - raise NotImplementedError("") - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py similarity index 94% rename from experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py rename to experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py index ce69c730b..e1bf272f6 100644 --- a/experimental/cityscapes/experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py +++ b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py @@ -20,6 +20,8 @@ """ # pylint: enable=line-too-long +import os +import datetime import ml_collections #import get_fewshot # local file import @@ -30,14 +32,14 @@ target_size=(512, 512) batch_size = 8 -number_train_examples_debug = 2975 +number_train_examples_debug = _CITYSCAPES_TRAIN_SIZE num_training_epochs = ml_collections.FieldReference(100) mlp_dim = 4096 num_heads = 16 num_layers = 24 hidden_size = 1024 -train_split = 'train[:1%]' +train_split = 'train' LOAD_PRETRAINED_BACKBONE=True @@ -136,7 +138,11 @@ def get_config(): config.use_wandb=True config.wandb_dir= 'wandb' config.wandb_project = 'rdl-visual' # Wandb project name. + config.wandb_entity = 'ub_rdl_big_paper' config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) config.wandb_exp_group = None # Give experiment a group name. config.early_stopping_patience = 3 # number of epochs to wait before stopping training diff --git a/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml new file mode 100644 index 000000000..f9e3d353a --- /dev/null +++ b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml @@ -0,0 +1,43 @@ +name: vit-imagenet21k_segmenter_deterministic +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.rng_seed: + values: [0, 1, 2, 3] + config.dataset_configs.train_split: + value: train + config.dataset_configs.number_train_examples_debug: + value: 2975 + config.lr_configs.base_learning_rate: + values: [0.0001, 0.003, 0.01, 0.001] + config.batch_size: + value: 8 + config.num_training_epochs: + values: [50, 100, 30, 75] + + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/run_vitl32_split100/checkpoints" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 919712417d42f6aae616801183d7c08ec7999635 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 11:51:59 -0500 Subject: [PATCH 061/150] add vit_batchensemble test to debug model loading --- experimental/cityscapes/README.md | 64 ++++++++++++--- experimental/cityscapes/run_ensemble_debug.sh | 1 + .../models/vit_batchensemble_test.py | 78 +++++++++++++++++++ 3 files changed, 132 insertions(+), 11 deletions(-) diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index 9bda0f550..decda40a5 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -11,12 +11,7 @@ [x] add run with vit l-32 backbone: run_l32_splits_vm.sh
[x] add eval for vit l-32 models: run_deterministic_eval_l32.sh
-Under development (no tpu compatibility) -[x] add run to train ensemble models: run_ensemble.sh
-[x] add early stopping flag
-[] Eval ensemble models: run_ensemble_eval
- -[Wandb integration ](https://docs.wandb.ai/guides/sweeps/configuration)
+## [Wandb integration ](https://docs.wandb.ai/guides/sweeps/configuration)
[x] Visualize results in wandb: run_ensemble.sh
[x] Hyperparameter sweep: experiments/toy/toy_sweep
@@ -25,14 +20,61 @@ wandb sweep experiments/toy/toy_sweep.yaml wandb agent ${SWEEPID} ``` -Code to run:
-[] Vanilla deterministic upstream + deterministic downstream.
+## Experiments + +Fully implemented:
+ +[x] Vanilla deterministic upstream + deterministic downstream.
+Given a deterministic model trained on imagenet21k, +replace the decoder by a segmentation decoder and finetune the model on cityscapes. ``` wandb sweep experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml -wandb agent ${SWEEPID} +wandb agent +``` +Once the models have trained independently, we can evaluate the results by running:
+``` +./run_deterministic_eval_l32.sh +``` + +Missing wandb configuration:
+ +[x] Ensemble (ensemble upstream + ensemble downstream).
+Given E deterministic models trained on imagenet21k, +replace the E decoders in each model by E new segmentations encoders +(This step is achieved by calling get_pretrained_backbone_path) +Finetune each model separately on cityscapes +Then, aggregate the results. + +``` +./run_ensemble2.sh +``` +Once the models have trained independently, we can aggregate the results by running:
+``` +./run_ensemble_eval.sh ``` -[] Ensemble (ensemble upstream + ensemble downstream).
[] Ensemble (ensemble upstream + deterministic downstream).
-[] Ensemble (BatchEnsemble upstream + deterministic downstream).
+Given E deterministic models trained on imagenet21k, +replace the E decoders in each model by 1 new segmentations encoder. +Finetune the new model on cityscapes. +``` + +``` +[] Batch Ensemble (batch ensemble upstream + deterministic downstream).
+Given a BE deterministic model trained on imagenet21k, +replace the MLP blocks in the encoder by +replace the E decoders in each model by rank-1 decoder which to get outputs [N, E, K]. +Finetune the new model on cityscapes. +Average over E to get the results. +``` + +``` + +To compare parameter between vit and vit_be model run: +``` +python -m unittest -v uncertainty_baselines/models/vit_batchensemble_test.py + +``` + + diff --git a/experimental/cityscapes/run_ensemble_debug.sh b/experimental/cityscapes/run_ensemble_debug.sh index 4ded8bcfe..96b90943f 100755 --- a/experimental/cityscapes/run_ensemble_debug.sh +++ b/experimental/cityscapes/run_ensemble_debug.sh @@ -62,6 +62,7 @@ do output_dir="${base_output_dir}/${run_name}" train_split="train[:${split}%]" num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} + pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) python deterministic.py \ --output_dir=${output_dir} \ --num_cores=$num_cores \ diff --git a/uncertainty_baselines/models/vit_batchensemble_test.py b/uncertainty_baselines/models/vit_batchensemble_test.py index 61cbd5ad1..7e9ccba6f 100644 --- a/uncertainty_baselines/models/vit_batchensemble_test.py +++ b/uncertainty_baselines/models/vit_batchensemble_test.py @@ -24,6 +24,84 @@ import ml_collections import uncertainty_baselines as ub +import flax + + + +class VitTest(parameterized.TestCase): + + @parameterized.parameters( + ('gap', 3, 5987), + ) + def test_vision_transformer(self, classifier, representation_size, + expected_param_count): + # TODO(dusenberrymw): Clean this up once config dict is cleaned up in + # VisionTransformer. + def getList(parent, dict): + for key, value in dict.items(): + var_name = '{}/{}'.format(parent, key) + if isinstance(value, jax.numpy.ndarray): + print('{}, {}'.format(var_name, value.shape)) + else: + getList(var_name, value) + return dict.keys() + + DEBUG=1 #also visualize params for vit model + config = ml_collections.ConfigDict() + # Model parameters. + config.model = ml_collections.ConfigDict() + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = [16, 16] + config.model.hidden_size = 768 + config.model.representation_size = 768 + config.model.classifier = 'token' + config.model.transformer = ml_collections.ConfigDict() + config.model.transformer.num_layers = 12 + config.model.transformer.dropout_rate = 0.0 + config.model.transformer.mlp_dim = 3072 + config.model.transformer.num_heads = 12 + config.model.transformer.attention_dropout_rate = 0.0 + + num_examples = 2 + num_classes = 1000 + inputs = jnp.ones([num_examples, 224, 224, 3], jnp.float32) + + if DEBUG ==1: + model = ub.models.vision_transformer(num_classes=num_classes, **config.model) + + key = jax.random.PRNGKey(0) + variables = model.init(key, inputs, train=False) + + param_count = sum(p.size for p in jax.tree_flatten(variables)[0]) + print(param_count) + getList('variables', variables) + + logits, outputs = model.apply(variables, inputs, train=False) + self.assertEqual(logits.shape, (num_examples, num_classes)) + self.assertEqual( + set(outputs.keys()), + set(('stem', 'transformed', 'head_input', 'pre_logits', 'logits'))) + + # BatchEnsemble parameters. + config.model.transformer.be_layers = (9, 11) + config.model.transformer.ens_size = 3 + config.model.transformer.random_sign_init = 0.5 + config.fast_weight_lr_multiplier = 1.0 + + model = ub.models.PatchTransformerBE(num_classes=num_classes, **config.model) + + key = jax.random.PRNGKey(0) + variables = model.init(key, inputs, train=False) + + param_count = sum(p.size for p in jax.tree_flatten(variables)[0]) + print(param_count) + getList('variables', variables) + + logits, outputs = model.apply(variables, inputs, train=False) + self.assertEqual(logits.shape, (num_examples * config.model.transformer.ens_size, num_classes)) + + self.assertEqual( + set(outputs.keys()), set(('pre_logits',))) From 4135b4998230cf4a7c594137d0d9bc43ba7a0dc2 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 16:18:34 -0500 Subject: [PATCH 062/150] update config file for ensemble run --- experimental/cityscapes/run_ensemble2.sh | 76 ++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100755 experimental/cityscapes/run_ensemble2.sh diff --git a/experimental/cityscapes/run_ensemble2.sh b/experimental/cityscapes/run_ensemble2.sh new file mode 100755 index 000000000..52eabea6f --- /dev/null +++ b/experimental/cityscapes/run_ensemble2.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# train segmenter model on cityscapes using different pretrained backbones for different splits + +function get_config() +{ + local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py" + echo "$config_file_name" +} + +function get_pretrained_backbone_path() +{ + local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" + echo "$checkpoint_path" +} + +#base_output_dir="outputs/ensemble" +base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" + +declare CITYSCAPES_TRAIN_SIZE=( + ["1"]="29" + ["10"]="298" + ["25"]="744" + ["50"]="1488" + ["75"]="2231" + ["100"]="2975" + ) + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=0 +batch_size=1 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi +for num_training_epochs in 50 #30 50 150 +do +for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" +do +for rng_seed in 0 1 2 +do +for model_type in "deterministic" +do +for split in 100 +do + config_file=$(get_config $model_type) + learning_rate=$( echo "$lr" | bc ) + run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" + output_dir="${base_output_dir}/${run_name}" + train_split="train[:${split}%]" + num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} + pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) + python deterministic.py \ + --output_dir=${output_dir} \ + --num_cores=$num_cores \ + --use_gpu=$use_gpu \ + --config=${config_file} \ + --config.rng_seed=${rng_seed} \ + --config.dataset_configs.train_split=${train_split} \ + --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ + --config.batch_size=${batch_size} \ + --tpu=${tpu} \ + --config.lr_configs.base_learning_rate=${learning_rate} \ + --config.num_training_epochs=${num_training_epochs} \ + --config.upstream_model=${model_type} \ + --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ + +done +done +done +done +done \ No newline at end of file From 71baf2cb381c20873607c28ffce2a6534a85bbbd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 16:19:14 -0500 Subject: [PATCH 063/150] add option to turn on stochastic_depth to vit transformer encoder --- uncertainty_baselines/models/vit.py | 36 +++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/uncertainty_baselines/models/vit.py b/uncertainty_baselines/models/vit.py index 89e98a61a..a3de05893 100644 --- a/uncertainty_baselines/models/vit.py +++ b/uncertainty_baselines/models/vit.py @@ -112,6 +112,9 @@ class Encoder1DBlock(nn.Module): attention_dropout_rate: dropout for attention heads. deterministic: bool, deterministic or not (to apply dropout). num_heads: Number of heads in nn.MultiHeadDotProductAttention + stochastic_depth: probability of dropping a layer linearly grows from 0 to + the provided value. + """ mlp_dim: int @@ -119,6 +122,26 @@ class Encoder1DBlock(nn.Module): dtype: Dtype = jnp.float32 dropout_rate: float = 0.1 attention_dropout_rate: float = 0.1 + stochastic_depth: float = 0.0 + + def get_stochastic_depth_mask(self, x: jnp.ndarray, + deterministic: bool) -> jnp.ndarray: + """Generate the stochastic depth mask in order to apply layer-drop. + + Args: + x: Input tensor. + deterministic: Weather we are in the deterministic mode (e.g inference + time) or not. + + Returns: + Stochastic depth mask. + """ + if not deterministic and self.stochastic_depth: + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + return jax.random.bernoulli( + self.make_rng('dropout'), self.stochastic_depth, shape) + else: + return 0.0 @nn.compact def __call__(self, inputs, *, deterministic): @@ -144,7 +167,7 @@ def __call__(self, inputs, *, deterministic): num_heads=self.num_heads, name='MultiHeadDotProductAttention_1')(x, x) x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - x = x + inputs + x = x * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + inputs # MLP block. y = nn.LayerNorm(dtype=self.dtype, name='LayerNorm_2')(x) @@ -155,7 +178,7 @@ def __call__(self, inputs, *, deterministic): dropout_rate=self.dropout_rate)( y, deterministic=deterministic) - return x + y + return y * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + x class Encoder(nn.Module): @@ -167,6 +190,12 @@ class Encoder(nn.Module): num_heads: Number of heads in nn.MultiHeadDotProductAttention dropout_rate: dropout rate. attention_dropout_rate: dropout rate in self attention. + stochastic_depth: probability of dropping a layer linearly grows from 0 to + the provided value. Our implementation of stochastic depth follows timm + library, which does per-example layer dropping and uses independent + dropping patterns for each skip-connection. + dtype: Dtype of activations. + """ num_layers: int @@ -174,6 +203,7 @@ class Encoder(nn.Module): num_heads: int dropout_rate: float = 0.1 attention_dropout_rate: float = 0.1 + stochastic_depth: float = 0.0 @nn.compact def __call__(self, inputs, *, train): @@ -200,6 +230,8 @@ def __call__(self, inputs, *, train): mlp_dim=self.mlp_dim, dropout_rate=self.dropout_rate, attention_dropout_rate=self.attention_dropout_rate, + stochastic_depth=(lyr / max(self.num_layers - 1, 1)) * + self.stochastic_depth, name=f'encoderblock_{lyr}', num_heads=self.num_heads)( x, deterministic=not train) From c728128e173bc1520a8b3bd57c1af6ed27cb24e2 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 16:48:37 -0500 Subject: [PATCH 064/150] update segmenter code so it uses vit model module --- uncertainty_baselines/models/segmenter.py | 233 +--------------------- 1 file changed, 7 insertions(+), 226 deletions(-) diff --git a/uncertainty_baselines/models/segmenter.py b/uncertainty_baselines/models/segmenter.py index 155f52117..8b2e46dad 100644 --- a/uncertainty_baselines/models/segmenter.py +++ b/uncertainty_baselines/models/segmenter.py @@ -17,243 +17,21 @@ Based on scenic library implementation. """ -from typing import Any, Callable, Optional, Tuple +from typing import Any, Tuple import flax.linen as nn import jax import jax.numpy as jnp import ml_collections +from uncertainty_baselines.models import vit + Array = Any PRNGKey = Any Shape = Tuple[int] Dtype = Any -class IdentityLayer(nn.Module): - """Identity layer, convenient for giving a name to an array.""" - - @nn.compact - def __call__(self, x): - return x - - -class AddPositionEmbs(nn.Module): - """Adds (optionally learned) positional embeddings to the inputs. - - Attributes: - posemb_init: positional embedding initializer. - """ - # TODO(kellybuchanan): check initialization, - # nn.initializers.normal(stddev=0.02) from BERT. - posemb_init: Callable[[PRNGKey, Shape, Dtype], Array] - - @nn.compact - def __call__(self, inputs): - """Applies AddPositionEmbs module. - - By default this layer uses a fixed sinusoidal embedding table. If a - learned position embedding is desired, pass an initializer to - posemb_init. - - Args: - inputs: Inputs to the layer. - - Returns: - Output tensor with shape `(bs, timesteps, in_dim)`. - """ - # inputs.shape is (batch_size, seq_len, emb_dim). - assert inputs.ndim == 3, ('Number of dimensions should be 3,' - ' but it is: %d' % inputs.ndim) - pos_emb_shape = (1, inputs.shape[1], inputs.shape[2]) - pe = self.param('pos_embedding', self.posemb_init, pos_emb_shape) - return inputs + pe - - -class MlpBlock(nn.Module): - """Transformer MLP / feed-forward block.""" - - mlp_dim: int - dtype: Dtype = jnp.float32 - out_dim: Optional[int] = None - dropout_rate: float = 0.1 - kernel_init: Callable[[PRNGKey, Shape, Dtype], - Array] = nn.initializers.xavier_uniform() - bias_init: Callable[[PRNGKey, Shape, Dtype], - Array] = nn.initializers.normal(stddev=1e-6) - use_bias: bool = True - precision: Optional[jax.lax.Precision] = None - activation_fn: Callable[[jnp.ndarray], jnp.ndarray] = nn.gelu - - @nn.compact - def __call__(self, inputs, *, deterministic): - """Applies Transformer MlpBlock module.""" - actual_out_dim = inputs.shape[-1] if self.out_dim is None else self.out_dim - x = nn.Dense( - features=self.mlp_dim, - dtype=self.dtype, - use_bias=self.use_bias, - kernel_init=self.kernel_init, - bias_init=self.bias_init, - precision=self.precision)( # pytype: disable=wrong-arg-types - inputs) - x = self.activation_fn(x) - x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - output = nn.Dense( - features=actual_out_dim, - dtype=self.dtype, - kernel_init=self.kernel_init, - bias_init=self.bias_init, - use_bias=self.use_bias, - precision=self.precision)( # pytype: disable=wrong-arg-types - x) - output = nn.Dropout( - rate=self.dropout_rate)( - output, deterministic=deterministic) - return output - - -class Encoder1DBlock(nn.Module): - """Transformer encoder layer. - - Attributes: - inputs: input data. - mlp_dim: dimension of the mlp on top of attention block. - dtype: the dtype of the computation (default: float32). - dropout_rate: dropout rate. - attention_dropout_rate: dropout for attention heads. - deterministic: bool, deterministic or not (to apply dropout). - num_heads: Number of heads in nn.MultiHeadDotProductAttention - stochastic_depth: probability of dropping a layer linearly grows from 0 to - the provided value. - - """ - - mlp_dim: int - num_heads: int - dtype: Dtype = jnp.float32 - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - stochastic_depth: float = 0.0 - - def get_stochastic_depth_mask(self, x: jnp.ndarray, - deterministic: bool) -> jnp.ndarray: - """Generate the stochastic depth mask in order to apply layer-drop. - - Args: - x: Input tensor. - deterministic: Weather we are in the deterministic mode (e.g inference - time) or not. - - Returns: - Stochastic depth mask. - """ - if not deterministic and self.stochastic_depth: - shape = (x.shape[0],) + (1,) * (x.ndim - 1) - return jax.random.bernoulli( - self.make_rng('dropout'), self.stochastic_depth, shape) - else: - return 0.0 - - @nn.compact - def __call__(self, inputs, *, deterministic): - """Applies Encoder1DBlock module. - - Args: - inputs: Inputs to the layer. - deterministic: Dropout will not be applied when set to true. - - Returns: - output after transformer encoder block. - """ - - # Attention block. - assert inputs.ndim == 3, f'Expected (batch, seq, hidden) got {inputs.shape}' - x = nn.LayerNorm(dtype=self.dtype, name='LayerNorm_0')(inputs) - x = nn.MultiHeadDotProductAttention( - dtype=self.dtype, - kernel_init=nn.initializers.xavier_uniform(), - broadcast_dropout=False, - deterministic=deterministic, - dropout_rate=self.attention_dropout_rate, - num_heads=self.num_heads, - name='MultiHeadDotProductAttention_1')(x, x) - x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - x = x * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + inputs - - # MLP block. - y = nn.LayerNorm(dtype=self.dtype, name='LayerNorm_2')(x) - y = MlpBlock( - mlp_dim=self.mlp_dim, - dtype=self.dtype, - name='MlpBlock_3', - dropout_rate=self.dropout_rate, - activation_fn=nn.gelu, - kernel_init=nn.initializers.xavier_uniform(), - bias_init=nn.initializers.normal(stddev=1e-6))( - y, deterministic=deterministic) - - return y * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + x - - -class Encoder(nn.Module): - """Transformer Model Encoder for sequence to sequence translation. - - Attributes: - num_layers: number of layers - mlp_dim: dimension of the mlp on top of attention block - num_heads: Number of heads in nn.MultiHeadDotProductAttention - dropout_rate: dropout rate. - attention_dropout_rate: dropout rate in self attention. - stochastic_depth: probability of dropping a layer linearly grows from 0 to - the provided value. Our implementation of stochastic depth follows timm - library, which does per-example layer dropping and uses independent - dropping patterns for each skip-connection. - dtype: Dtype of activations. - """ - - num_layers: int - mlp_dim: int - num_heads: int - dropout_rate: float = 0.1 - attention_dropout_rate: float = 0.1 - stochastic_depth: float = 0.0 - dtype: Any = jnp.float32 - - @nn.compact - def __call__(self, inputs: jnp.ndarray, *, train: bool = False): - """Applies Transformer model on the inputs. - - Args: - inputs: Inputs to the layer. - train: Set to `True` when training. - - Returns: - output of a transformer encoder. - """ - assert inputs.ndim == 3 # (batch, len, emb) - x = AddPositionEmbs( - posemb_init=nn.initializers.normal(stddev=0.02), # from BERT. - name='posembed_input')( - inputs) - x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=not train) - - # Input Encoder - for lyr in range(self.num_layers): - x = Encoder1DBlock( - mlp_dim=self.mlp_dim, - dropout_rate=self.dropout_rate, - attention_dropout_rate=self.attention_dropout_rate, - stochastic_depth=(lyr / max(self.num_layers - 1, 1)) * - self.stochastic_depth, - name=f'encoderblock_{lyr}', - num_heads=self.num_heads)( - x, deterministic=not train) - encoded = nn.LayerNorm(name='encoder_norm')(x) - - return encoded - - class ViTBackbone(nn.Module): """Vision Transformer model backbone (everything except the head). @@ -268,6 +46,7 @@ class ViTBackbone(nn.Module): dropout_rate: float = 0.1 attention_dropout_rate: float = 0.1 classifier: str = 'gap' + stochastic_depth: float = 0.0 @nn.compact def __call__(self, inputs, *, train: bool): @@ -299,12 +78,13 @@ def __call__(self, inputs, *, train: bool): cls = jnp.tile(cls, [n, 1, 1]) x = jnp.concatenate([cls, x], axis=1) - x = Encoder(name='Transformer', + x = vit.Encoder(name='Transformer', mlp_dim=self.mlp_dim, num_layers=self.num_layers, num_heads=self.num_heads, dropout_rate=self.dropout_rate, attention_dropout_rate=self.attention_dropout_rate, + stochastic_depth=self.stochastic_depth, )(x, train=train) out['transformed'] = x @@ -338,6 +118,7 @@ def __call__(self, x: jnp.ndarray, *, train: bool, debug: bool = False): dropout_rate=self.backbone_configs.dropout_rate, attention_dropout_rate=self.backbone_configs.attention_dropout_rate, classifier=self.backbone_configs.classifier, + stochastic_depth=self.backbone_configs.get('stochastic_depth', 0), name='backbone')( x, train=train) else: From 34a61f1f4955eded3ad0b8c091a3aa838b75bfc9 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 17:14:04 -0500 Subject: [PATCH 065/150] add stochastic layer to batch ensemble model --- .../models/vit_batchensemble.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/uncertainty_baselines/models/vit_batchensemble.py b/uncertainty_baselines/models/vit_batchensemble.py index e8575026c..8e73195e6 100644 --- a/uncertainty_baselines/models/vit_batchensemble.py +++ b/uncertainty_baselines/models/vit_batchensemble.py @@ -102,6 +102,26 @@ class Encoder1DBlock(nn.Module): dtype: Optional[DType] = None dropout_rate: float = 0.0 attention_dropout_rate: float = 0.0 + stochastic_depth: float = 0.0 + + def get_stochastic_depth_mask(self, x: jnp.ndarray, + deterministic: bool) -> jnp.ndarray: + """Generate the stochastic depth mask in order to apply layer-drop. + + Args: + x: Input tensor. + deterministic: Weather we are in the deterministic mode (e.g inference + time) or not. + + Returns: + Stochastic depth mask. + """ + if not deterministic and self.stochastic_depth: + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + return jax.random.bernoulli( + self.make_rng('dropout'), self.stochastic_depth, shape) + else: + return 0.0 @nn.compact def __call__(self, @@ -121,13 +141,13 @@ def __call__(self, num_heads=self.num_heads, dropout_rate=self.attention_dropout_rate)(x, x) x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - x = x + inputs + x = x * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + inputs # MLP block. y = nn.LayerNorm(dtype=self.dtype, name="LayerNorm_2")(x) y = self.mlp_class(name="MlpBlock_3")(y, deterministic=deterministic) - return x + y + return y * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + x class BatchEnsembleEncoder(nn.Module): @@ -146,6 +166,11 @@ class BatchEnsembleEncoder(nn.Module): train: True if the module is used for training. be_layers: Sequence of layers where BE MLPs are included. If None, use BE MLP blocks in every other layer (1, 3, 5, ...). First layer is 0. + stochastic_depth: probability of dropping a layer linearly grows from 0 to + the provided value. Our implementation of stochastic depth follows timm + library, which does per-example layer dropping and uses independent + dropping patterns for each skip-connection. + """ num_layers: int mlp_dim: int @@ -157,6 +182,7 @@ class BatchEnsembleEncoder(nn.Module): attention_dropout_rate: float = 0.0 train: Optional[bool] = None be_layers: Optional[Sequence[int]] = None + stochastic_depth: float = 0.0 @nn.compact def __call__(self, @@ -203,6 +229,8 @@ def is_first_be_layer(lyr: int) -> bool: dtype=dtype, dropout_rate=self.dropout_rate, attention_dropout_rate=self.attention_dropout_rate, + stochastic_depth=(lyr / max(self.num_layers - 1, 1)) * + self.stochastic_depth, name=f"encoderblock_{lyr}") if lyr in be_layers: # We need to tile inputs before the first BE layer. From 22af10a57dec22b008cbb3a4e4f98bf08cfe6637 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 17:15:17 -0500 Subject: [PATCH 066/150] update ensemble config names --- experimental/cityscapes/run_ensemble.sh | 2 +- experimental/cityscapes/run_ensemble2.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh index 095b59b4a..931e4f5b7 100755 --- a/experimental/cityscapes/run_ensemble.sh +++ b/experimental/cityscapes/run_ensemble.sh @@ -4,7 +4,7 @@ function get_config() { - local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1.py" + local config_file_name="experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_$1.py" echo "$config_file_name" } diff --git a/experimental/cityscapes/run_ensemble2.sh b/experimental/cityscapes/run_ensemble2.sh index 52eabea6f..e4232ee1d 100755 --- a/experimental/cityscapes/run_ensemble2.sh +++ b/experimental/cityscapes/run_ensemble2.sh @@ -4,7 +4,7 @@ function get_config() { - local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic.py" + local config_file_name="experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py" echo "$config_file_name" } From 1dda6bd4aece0588053c550d8b6c3a2c8d1e9f0a Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 18:46:44 -0500 Subject: [PATCH 067/150] add segmenter_be_model --- experimental/cityscapes/README.md | 4 + uncertainty_baselines/models/__init__.py | 10 + uncertainty_baselines/models/segmenter_be.py | 215 ++++++++++++++++++ .../models/segmenter_be_test.py | 80 +++++++ 4 files changed, 309 insertions(+) create mode 100644 uncertainty_baselines/models/segmenter_be.py create mode 100644 uncertainty_baselines/models/segmenter_be_test.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md index decda40a5..35b8cadc4 100644 --- a/experimental/cityscapes/README.md +++ b/experimental/cityscapes/README.md @@ -75,6 +75,10 @@ python -m unittest -v uncertainty_baselines/models/vit_batchensemble_test.py ``` +To debug segmenter BE model run: +``` +python -m unittest -v uncertainty_baselines/models/segmenter_be_test.py +``` diff --git a/uncertainty_baselines/models/__init__.py b/uncertainty_baselines/models/__init__.py index 92fd4682e..cba2a8a16 100644 --- a/uncertainty_baselines/models/__init__.py +++ b/uncertainty_baselines/models/__init__.py @@ -97,6 +97,16 @@ logging.warning('Skipped Segmenter models due to NotFoundError.', exc_info=True) +try: + # Try to import Segmenter models. + from uncertainty_baselines.models.segmenter_be import segmenter_be_transformer +except ImportError: + logging.warning('Skipped Segmenter BE model due to ImportError.', exc_info=True) +except tf.errors.NotFoundError: + logging.warning('Skipped Segmenter BE model due to NotFoundError.', + exc_info=True) + + try: # Try to import models depending on tensorflow_models.official.nlp. from uncertainty_baselines.models import bert diff --git a/uncertainty_baselines/models/segmenter_be.py b/uncertainty_baselines/models/segmenter_be.py new file mode 100644 index 000000000..ee09491a9 --- /dev/null +++ b/uncertainty_baselines/models/segmenter_be.py @@ -0,0 +1,215 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Segmenter Vision Transformer (ViT) model. + +Based on scenic library implementation. +""" +from typing import Any, Callable, Optional, Tuple, Sequence,Iterable + +import flax.linen as nn +import jax +import jax.numpy as jnp +import ml_collections +from uncertainty_baselines.models import vit_batchensemble, segmenter +import edward2.jax as ed + +Array = Any +PRNGKey = Any +Shape = Tuple[int] +DType = type(jnp.float32) + +InitializeFn = Callable[[jnp.ndarray, Iterable[int], DType], jnp.ndarray] + +class ViTBackboneBE(nn.Module): + """Vision Transformer model backbone (everything except the head). + + Edited from VisionTransformer. + """ + + mlp_dim: int + num_layers: int + num_heads: int + patches: ml_collections.ConfigDict + hidden_size: int + + ens_size: int + random_sign_init: float + be_layers: Optional[Sequence[int]] = None + + dropout_rate: float = 0.1 + attention_dropout_rate: float = 0.1 + classifier: str = 'gap' + stochastic_depth: float = 0.0 + + @nn.compact + def __call__(self, inputs, *, train: bool): + out = {} + + x = inputs + n, h, w, c = x.shape + + # We can merge s2d+emb into a single conv; it's the same. + x = nn.Conv( + features=self.hidden_size, + kernel_size=self.patches.size, + strides=self.patches.size, + padding='VALID', + name='embedding')( + x) + + # Here, x is a grid of embeddings. + # TODO(dusenberrymw): Switch to self.sow(.). + out['stem'] = x + + # Transformer. + n, h, w, c = x.shape + x = jnp.reshape(x, [n, h * w, c]) + + # If we want to add a class token, add it here. + if self.classifier == 'token': + cls = self.param('cls', nn.initializers.zeros, (1, 1, c)) + cls = jnp.tile(cls, [n, 1, 1]) + x = jnp.concatenate([cls, x], axis=1) + + x, extra_info = vit_batchensemble.BatchEnsembleEncoder( + train=train, + name='Transformer', + mlp_dim=self.mlp_dim, + num_layers=self.num_layers, + num_heads=self.num_heads, + dropout_rate=self.dropout_rate, + attention_dropout_rate=self.attention_dropout_rate, + ens_size=self.ens_size, + random_sign_init=self.random_sign_init, + be_layers=self.be_layers, + stochastic_depth=self.stochastic_depth, + )(x) + + out.update(extra_info) + out['transformed'] = x + + return x, out + + +class SegVitBE(nn.Module): + """Segmentation model with ViT backbone and decoder.""" + + num_classes: int + patches: ml_collections.ConfigDict + backbone_configs: ml_collections.ConfigDict + decoder_configs: ml_collections.ConfigDict + head_kernel_init: InitializeFn = nn.initializers.zeros + + @nn.compact + def __call__(self, x: jnp.ndarray, *, train: bool, debug: bool = False): + input_shape = x.shape + b, h, w, _ = input_shape + + fh, fw = self.patches.size + gh, gw = h // fh, w // fw + + if self.backbone_configs.type == 'vit' and self.decoder_configs.type == 'linear': + assert self.backbone_configs.ens_size == 1 + + if self.backbone_configs.type == 'vit': + x, out = segmenter.ViTBackbone( + mlp_dim=self.backbone_configs.mlp_dim, + num_layers=self.backbone_configs.num_layers, + num_heads=self.backbone_configs.num_heads, + patches=self.patches, + hidden_size=self.backbone_configs.hidden_size, + dropout_rate=self.backbone_configs.dropout_rate, + attention_dropout_rate=self.backbone_configs.attention_dropout_rate, + classifier=self.backbone_configs.classifier, + stochastic_depth=self.backbone_configs.get("stochastic_depth", 0), + name='backbone')( + x, train=train) + + elif self.backbone_configs.type == 'vit_be': + x, out = ViTBackboneBE( + mlp_dim=self.backbone_configs.mlp_dim, + num_layers=self.backbone_configs.num_layers, + num_heads=self.backbone_configs.num_heads, + patches=self.patches, + hidden_size=self.backbone_configs.hidden_size, + dropout_rate=self.backbone_configs.dropout_rate, + attention_dropout_rate=self.backbone_configs.attention_dropout_rate, + classifier=self.backbone_configs.classifier, + ens_size=self.backbone_configs.ens_size, + random_sign_init=self.backbone_configs.random_sign_init, + be_layers=self.backbone_configs.be_layers, + stochastic_depth=self.backbone_configs.get("stochastic_depth", 0), + name='backbone')( + x, train=train) + + else: + raise ValueError(f'Unknown backbone: {self.backbone_configs.type}.') + + # (ens_size*n, gh*gw, hidden_size) x.shape + + if self.decoder_configs.type == 'linear': + output_projection = nn.Dense( + self.num_classes, + kernel_init=nn.initializers.zeros, + name='output_projection') + elif self.decoder_configs.type == 'linear_be': + output_projection = ed.nn.DenseBatchEnsemble( + self.num_classes, + self.backbone_configs.ens_size, + activation=None, + alpha_init=ed.nn.utils.make_sign_initializer( + self.backbone_configs.get("random_sign_init")), + gamma_init=ed.nn.utils.make_sign_initializer( + self.backbone_configs.get("random_sign_init")), + kernel_init=self.head_kernel_init, + name="output_projection_be") + else: + raise ValueError( + f'Decoder type {self.decoder_configs.type} is not defined.') + + ens_size = self.backbone_configs.get("ens_size") + + # Linear head only, like Segmenter baseline: + # https://arxiv.org/abs/2105.05633 + x = jnp.reshape(x, [b*ens_size, gh, gw, -1]) + x = output_projection(x) + + # Resize bilinearly: + x = jax.image.resize(x, [b*ens_size, h, w, x.shape[-1]], 'linear') + out['logits'] = x + + new_input_shape = tuple([input_shape[0]*ens_size,] + list(input_shape[1:-1])) + assert new_input_shape == x.shape[:-1], ( + 'BE Input and output shapes do not match: %d vs. %d.', new_input_shape, + x.shape[:-1]) + + return x, out + + +def segmenter_be_transformer(num_classes: int, + patches: Any, + backbone_configs: Any, + decoder_configs: Any + ): + """Builds a Vision Transformer (ViT) model.""" + # TODO(dusenberrymw): Add API docs once config dict in VisionTransformer is + # cleaned up. + return SegVitBE( + num_classes=num_classes, + patches=patches, + backbone_configs=backbone_configs, + decoder_configs=decoder_configs, + ) diff --git a/uncertainty_baselines/models/segmenter_be_test.py b/uncertainty_baselines/models/segmenter_be_test.py new file mode 100644 index 000000000..cf693a24c --- /dev/null +++ b/uncertainty_baselines/models/segmenter_be_test.py @@ -0,0 +1,80 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the segmenter ViT model.""" +from absl.testing import absltest +from absl.testing import parameterized +import jax +import jax.numpy as jnp +import ml_collections +import uncertainty_baselines as ub + + +class SegVitBETest(parameterized.TestCase): + + @parameterized.parameters( + (2, 2, 1, 12, 1, 'gap', 'vit', 'linear', 1), + (2, 2, 1, 12, 1, 'gap', 'vit_be', 'linear', 3), + (2, 2, 1, 12, 1, 'gap', 'vit_be', 'linear_be', 3), + # (2, 2, 1, 12, 1, 'gap', 'vit', 'linear_be', 1 vs 3), # Not supported + ) + def test_segmenter_transformer(self, num_classes, mlp_dim, num_heads, num_layers, hidden_size, classifier, + encoder_type, decoder_type, ens_size): + # VisionTransformer. + img_h = 224 + img_w = 224 + config = ml_collections.ConfigDict() + + config.num_classes = num_classes + + config.patches = ml_collections.ConfigDict() + config.patches.size = [4, 4] + + config.backbone_configs = ml_collections.ConfigDict() + config.backbone_configs.type = encoder_type + + config.backbone_configs.hidden_size = hidden_size + config.backbone_configs.attention_dropout_rate = 0. + config.backbone_configs.dropout_rate = 0. + config.backbone_configs.mlp_dim = mlp_dim + config.backbone_configs.num_heads = num_heads + config.backbone_configs.num_layers = num_layers + config.backbone_configs.classifier = classifier + + config.decoder_configs = ml_collections.ConfigDict() + config.decoder_configs.type = decoder_type + #config.decoder_configs.ens_size = decoder_ens_size + + # BE params + config.backbone_configs.ens_size = ens_size + config.backbone_configs.random_sign_init = -0.5 + config.backbone_configs.be_layers = (1,) + + num_examples = 2 + inputs = jnp.ones([num_examples, img_h, img_w, 3], jnp.float32) + model = ub.models.segmenter_be_transformer(**config) + key = jax.random.PRNGKey(0) + variables = model.init(key, inputs, train=False) + + logits, outputs = model.apply(variables, inputs, train=False) + + self.assertEqual(logits.shape, (num_examples*ens_size, img_h, img_w, num_classes)) + #self.assertEqual( + # set(outputs.keys()), + # set(('stem', 'transformed', 'logits'))) + + +if __name__ == '__main__': + absltest.main() From c01a8c7ac3a0bfec48eeb97f3d655bd1ef18d518 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 11 Feb 2022 18:49:22 -0500 Subject: [PATCH 068/150] add note about updating targets --- experimental/cityscapes/custom_segmentation_trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py index ad6728b4d..770948f94 100644 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ b/experimental/cityscapes/custom_segmentation_trainer.py @@ -179,6 +179,11 @@ def eval_step( } (logits, _) = flax_model.apply( variables, batch['inputs'], train=False, mutable=False, debug=debug) + # upscale the targets + + # labels = jnp.tile(labels, (ens_size, 1)) + # https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/batchensemble.py#L391 + # loss_fn = getattr(train_utils, config.get('loss', 'sigmoid_xent')) metrics = metrics_fn(logits, batch) confusion_matrix = get_confusion_matrix( @@ -259,7 +264,7 @@ def train( if start_step == 0 and config.get('load_pretrained_backbone', False): # TODO(kellybuchanan): check out partial loader in # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# - + import pdb; pdb.set_trace() bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') # bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') From 394b2e02c54086dad33512731f01b95f0f530fbe Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 10:02:28 -0400 Subject: [PATCH 069/150] add baseline changes to train segmenter model on ade20k dataset on mac --- experimental/robust_segvit/README.md | 11 ++++++ experimental/robust_segvit/___init__.py | 0 .../configs/ade20k_ind/toy_model.py | 13 +++++++ .../configs/cityscapes/toy_model.py | 14 +++++++- .../custom_segmentation_trainer.py | 4 +-- experimental/robust_segvit/run_toy_mac.sh | 32 +++++++++++++++++ experimental/robust_segvit/run_toy_mac.yaml | 34 +++++++++++++++++++ 7 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 experimental/robust_segvit/___init__.py create mode 100755 experimental/robust_segvit/run_toy_mac.sh create mode 100755 experimental/robust_segvit/run_toy_mac.yaml diff --git a/experimental/robust_segvit/README.md b/experimental/robust_segvit/README.md index 315313b7b..0971231c9 100644 --- a/experimental/robust_segvit/README.md +++ b/experimental/robust_segvit/README.md @@ -14,3 +14,14 @@ We investigate the performance of different reliability methods on image segment [x] configs/cityscapes: contains experiment configurations for the cityscapes dataset.
+ +## Debugging: + +To run the code on cpu, install the dependencies as in: +[x] Copy ananconda environment +[x] Install jaxlib, jax, flax from source +[x] Install scenic from source +[x] Install uncertainty_baselines from source + +## Issues +[] Fails to read segmenter_be model. diff --git a/experimental/robust_segvit/___init__.py b/experimental/robust_segvit/___init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py index cfc274317..a9d24f741 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py +++ b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py @@ -20,6 +20,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -138,6 +140,17 @@ def get_config(runlocal=''): config.eval_covariate_shift = False config.eval_label_shift = False + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'me' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.batch_size = 8 diff --git a/experimental/robust_segvit/configs/cityscapes/toy_model.py b/experimental/robust_segvit/configs/cityscapes/toy_model.py index 6a5e50e80..7a47ecfd0 100644 --- a/experimental/robust_segvit/configs/cityscapes/toy_model.py +++ b/experimental/robust_segvit/configs/cityscapes/toy_model.py @@ -20,7 +20,8 @@ # pylint: enable=line-too-long import ml_collections - +import os +import datetime batch_size = 128 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -118,6 +119,17 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_label_shift = True + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'me' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 086ab1ae9..f6022a71a 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -816,7 +816,7 @@ def train( checkpoint_steps = config.get('checkpoint_steps') or log_eval_steps train_metrics, extra_training_logs = [], [] - train_summary, eval_summary = None, None + train_summary, eval_summary = {}, {} global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error global_unc_metrics_fn = model.get_global_unc_metrics_fn() # pytype: disable=attribute-error @@ -1154,6 +1154,7 @@ def evaluate_ood_step( eval_summary: summary evaluation """ del workdir + eval_summary = {} if config.get('eval_covariate_shift', False): @@ -1168,7 +1169,6 @@ def evaluate_ood_step( # We can donate the eval_batch's buffer. ) - eval_summary = None global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error global_unc_metrics_fn = model.get_global_unc_metrics_fn() # pytype: disable=attribute-error diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh new file mode 100755 index 000000000..4c90abeb1 --- /dev/null +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# train toy model using wandb +#wandb sweep run_toy_mac.yaml +# before make sure we can run code vanilla version: + +DATASET='cityscapes' +DATASET='ade20k_ind' +base_output_dir='gs://ub-ekb/segmenter/ade20k_ind/toy_model' + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +config_file="configs/ade20k_ind/toy_model.py:runlocal" +run_name="toy_model" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--tpu=${tpu} \ diff --git a/experimental/robust_segvit/run_toy_mac.yaml b/experimental/robust_segvit/run_toy_mac.yaml new file mode 100755 index 000000000..dba4db0bf --- /dev/null +++ b/experimental/robust_segvit/run_toy_mac.yaml @@ -0,0 +1,34 @@ +name: toy_model +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 8 + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/toy_model.py:runlocal" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/toy_model" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From cf6a4afa036a3214d11c10d47bc4fee63ec0fe7f Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 10:48:55 -0400 Subject: [PATCH 070/150] update run_toy_mac to select between city or ade20k --- experimental/robust_segvit/configs/ade20k_ind/toy_model.py | 4 ++-- experimental/robust_segvit/run_toy_mac.sh | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py index a9d24f741..3e7ad5b5c 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py +++ b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py @@ -137,8 +137,8 @@ def get_config(runlocal=''): config.eval_mode = False config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' - config.eval_covariate_shift = False - config.eval_label_shift = False + config.eval_covariate_shift = True + config.eval_label_shift = True # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index 4c90abeb1..b4f5192c6 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -5,8 +5,9 @@ # before make sure we can run code vanilla version: DATASET='cityscapes' -DATASET='ade20k_ind' -base_output_dir='gs://ub-ekb/segmenter/ade20k_ind/toy_model' +DATASET='ade20k_ind' # or cityscapes + +base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" # Debug on Mac OS X platform use_gpu=False @@ -20,7 +21,7 @@ num_cores=8 batch_size=8 fi -config_file="configs/ade20k_ind/toy_model.py:runlocal" +config_file="configs/${DATASET}/toy_model.py:runlocal" run_name="toy_model" output_dir="${base_output_dir}/${run_name}" python deterministic.py \ From fefe26bf4300a626692b46f8b9a93230c5b3271f Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 12:21:42 -0400 Subject: [PATCH 071/150] remove duplicated eval configs and call to numpy call to masked array --- experimental/robust_segvit/configs/ade20k_ind/be_eval.py | 2 -- experimental/robust_segvit/configs/ade20k_ind/het_eval.py | 2 -- .../robust_segvit/configs/ade20k_ind/toy_model.py | 4 ++++ experimental/robust_segvit/configs/cityscapes/be_eval.py | 2 -- experimental/robust_segvit/ood_metrics.py | 8 +++----- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py index f70890536..a392c7c8c 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py @@ -162,8 +162,6 @@ def get_config(runlocal=''): config.eval_mode = True config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' - config.eval_covariate_shift = True - config.eval_label_shift = True config.model.input_shape = target_size # Eval parameters for robustness diff --git a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py index 66f06f8e4..bf3d08962 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py @@ -172,8 +172,6 @@ def get_config(runlocal=''): config.eval_mode = True config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' - config.eval_covariate_shift = True - config.eval_label_shift = True config.model.input_shape = target_size # Eval parameters for robustness diff --git a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py index 3e7ad5b5c..e7c7caec4 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py +++ b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py @@ -140,6 +140,10 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_label_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + # wandb.ai configurations. config.use_wandb = False config.wandb_dir = 'wandb' diff --git a/experimental/robust_segvit/configs/cityscapes/be_eval.py b/experimental/robust_segvit/configs/cityscapes/be_eval.py index 38f30d6b0..a15182949 100644 --- a/experimental/robust_segvit/configs/cityscapes/be_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/be_eval.py @@ -143,8 +143,6 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'segmm' config.eval_configs.window_stride = 512 - config.eval_covariate_shift = True - config.eval_label_shift = True config.model.input_shape = target_size # Eval parameters for robustness diff --git a/experimental/robust_segvit/ood_metrics.py b/experimental/robust_segvit/ood_metrics.py index 36e3b2ce5..a1fd0c21b 100644 --- a/experimental/robust_segvit/ood_metrics.py +++ b/experimental/robust_segvit/ood_metrics.py @@ -148,11 +148,9 @@ def get_ood_metrics( # the weights per entry are 1 if it should be included during computation # and 0 otherwise. - # the masked array makes any entry with value 1 as invalid. - y_true_masked = np.ma.masked_array(y_true, mask=1-weights) - ood_score_masked = np.ma.masked_array(ood_score, mask=1-weights) + y_true_masked = y_true[weights == 1] + ood_score_masked = ood_score[weights == 1] - metrics = compute_ood_metrics(y_true_masked.flatten(), - ood_score_masked.flatten()) + metrics = compute_ood_metrics(y_true_masked.flatten(), ood_score_masked.flatten()) return metrics From 9aae57cce41f5e4e767635299b56f5413007d832 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 18:03:15 -0400 Subject: [PATCH 072/150] update default config for toy_model --- experimental/robust_segvit/configs/ade20k_ind/toy_model.py | 2 +- experimental/robust_segvit/configs/cityscapes/toy_model.py | 6 +++++- experimental/robust_segvit/run_toy_mac.sh | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py index e7c7caec4..55dbdfdd7 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/toy_model.py +++ b/experimental/robust_segvit/configs/ade20k_ind/toy_model.py @@ -148,7 +148,7 @@ def get_config(runlocal=''): config.use_wandb = False config.wandb_dir = 'wandb' config.wandb_project = 'rdl-debug' - config.wandb_entity = 'me' + config.wandb_entity = 'ekellbuch' config.wandb_exp_name = None # Give experiment a name. config.wandb_exp_name = ( os.path.splitext(os.path.basename(__file__))[0] + '_' + diff --git a/experimental/robust_segvit/configs/cityscapes/toy_model.py b/experimental/robust_segvit/configs/cityscapes/toy_model.py index 7a47ecfd0..5fc8107c1 100644 --- a/experimental/robust_segvit/configs/cityscapes/toy_model.py +++ b/experimental/robust_segvit/configs/cityscapes/toy_model.py @@ -119,11 +119,15 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_label_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + # wandb.ai configurations. config.use_wandb = False config.wandb_dir = 'wandb' config.wandb_project = 'rdl-debug' - config.wandb_entity = 'me' + config.wandb_entity = 'ekellbuch' config.wandb_exp_name = None # Give experiment a name. config.wandb_exp_name = ( os.path.splitext(os.path.basename(__file__))[0] + '_' + diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index b4f5192c6..675141b3f 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -4,8 +4,8 @@ #wandb sweep run_toy_mac.yaml # before make sure we can run code vanilla version: -DATASET='cityscapes' DATASET='ade20k_ind' # or cityscapes +DATASET='cityscapes' base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" From 3ff6f861cf3b873a7ca908f28226cb139eb96a92 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 19:44:48 -0400 Subject: [PATCH 073/150] add street_hazards to custom_segmentation_trainer --- .../custom_segmentation_trainer.py | 129 ++++++++++++++---- experimental/robust_segvit/run_toy_mac.sh | 1 + 2 files changed, 104 insertions(+), 26 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index f6022a71a..c57705b25 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1191,20 +1191,22 @@ def evaluate_ood_step( ood_dataset = 'ade20k_ind_c' else: logging.info('OOD Covariate shift dataset is not implemented') + ood_dataset = None - eval_summary = eval_ood_covariate[ood_dataset]( - train_state=train_state, - config=config, - rng=rng, - eval_step_pmapped=eval_step_pmapped, - writer=writer, - lead_host=lead_host, - global_metrics_fn=global_metrics_fn, - global_unc_metrics_fn=global_unc_metrics_fn, - ) + if ood_dataset: + eval_summary = eval_ood_covariate[ood_dataset]( + train_state=train_state, + config=config, + rng=rng, + eval_step_pmapped=eval_step_pmapped, + writer=writer, + lead_host=lead_host, + global_metrics_fn=global_metrics_fn, + global_unc_metrics_fn=global_unc_metrics_fn, + ) - # Wait until computations are done before exiting. - jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() + # Wait until computations are done before exiting. + jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() # ---------------------------------------------------------------------------- if config.get('eval_label_shift', False): @@ -1221,7 +1223,8 @@ def evaluate_ood_step( eval_label_shift = { 'fishyscapes': evaluate_fishyscapes, - 'ade20k_ood_open': evaluate_ade20k_ood_open + 'ade20k_ood_open': evaluate_ade20k_ood_open, + 'street_hazards_ood_open': evaluate_street_hazards_ood_open, } # The form of the ind dataset name depends on the source of the data. @@ -1234,25 +1237,28 @@ def evaluate_ood_step( if any('cityscapes' in ind_name for ind_name in ind_names): logging.info('Loading Fishyscapes...') ood_dataset = 'fishyscapes' - - if any('ade20k' in ind_name for ind_name in ind_names): + elif any('ade20k' in ind_name for ind_name in ind_names): logging.info('Loading ADE20k OOD OPEN...') ood_dataset = 'ade20k_ood_open' - + elif any('street_hazards' in ind_name for ind_name in ind_names): + logging.info('Loading StreetHazards OPEN...') + ood_dataset = 'street_hazards_ood_open' else: logging.info('OOD Label shift dataset is not implemented') + ood_dataset = None - eval_summary = eval_label_shift[ood_dataset]( - train_state=train_state, - config=config, - rng=rng, - eval_step_pmapped=eval_step_ood_pmapped, - writer=writer, - lead_host=lead_host, - ) + if ood_dataset: + eval_summary = eval_label_shift[ood_dataset]( + train_state=train_state, + config=config, + rng=rng, + eval_step_pmapped=eval_step_ood_pmapped, + writer=writer, + lead_host=lead_host, + ) - # Wait until computations are done before exiting. - jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() + # Wait until computations are done before exiting. + jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() return eval_summary @@ -1571,3 +1577,74 @@ def evaluate_ade20k_corrupted( writer.write_scalars(0, avg_corrupted_metrics) writer.flush() return eval_summary + + +def evaluate_street_hazards_ood_open( + train_state: train_utils.TrainState, + config: ml_collections.ConfigDict, + rng: Any, + eval_step_pmapped: Any, + writer: metric_writers.MetricWriter, + lead_host: Any, +) -> Dict[str, Any]: + """Evaluate StreetHazards OOD dataset. + + Args: + train_state: train state. + config: experiment configuration. + rng: jax rng. + eval_step_pmapped: eval state + writer: CLU metrics writer instance. + lead_host: Evaluate global metrics on one of the hosts (lead_host) given + intermediate values collected from all hosts. + + Returns: + eval_summary: summary evaluation + """ + # set resource limit to debug in mac osx + # (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (low, high)) + + # update config: + ood_config = ml_collections.ConfigDict() + ood_config.update(**config) + ood_config.update({'dataset_name': 'robust_segvit_segmentation'}) + + device_count = jax.device_count() + prefix = 'street_hazards_open' + + with ood_config.unlocked(): + ood_config.dataset_configs.name = 'street_hazards_open' + ood_config.batch_size = device_count + + data_rng, rng = jax.random.split(rng) + dataset = train_utils.get_dataset(ood_config, data_rng) + dataset.meta_data['prefix'] = prefix + + eval_summary = evaluate_ood( + train_state=train_state, + dataset=dataset, + config=ood_config, + step=0, + eval_step_pmapped=eval_step_pmapped, + writer=writer, + lead_host=lead_host, + prefix=dataset.meta_data['prefix'], + **config.get('eval_robustness_configs', {}), + ) + + # append name to metrics + key_separator = '_' + avg_open_set_metrics = { + key_separator.join((prefix, key)): val + for key, val in eval_summary.items() + } + # update metrics + eval_summary.update(avg_open_set_metrics) + writer.write_scalars(0, avg_open_set_metrics) + writer.flush() + + return eval_summary + diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index 675141b3f..b4a79ae65 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -6,6 +6,7 @@ DATASET='ade20k_ind' # or cityscapes DATASET='cityscapes' +DATASET='street_hazards' base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" From 5a3a90208d9d17f30e66fd6234dd037b1605f2c2 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 20:00:31 -0400 Subject: [PATCH 074/150] add street_hazards configurations --- .../configs/street_hazards/deterministic.py | 255 ++++++++++++++++++ .../configs/street_hazards/toy_model.py | 170 ++++++++++++ .../run_deterministic_street_hazards.sh | 34 +++ .../run_deterministic_street_hazards.yaml | 38 +++ 4 files changed, 497 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/deterministic.py create mode 100644 experimental/robust_segvit/configs/street_hazards/toy_model.py create mode 100755 experimental/robust_segvit/run_deterministic_street_hazards.sh create mode 100755 experimental/robust_segvit/run_deterministic_street_hazards.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py new file mode 100644 index 000000000..f8936f804 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -0,0 +1,255 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on street_hazards. + +Compare performance from deterministic upstream checkpoints. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE + +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'big_vision' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (1280, 720) +UPSTREAM_TASK = 'i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_deterministic' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = True + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + learning_rate = hyper.sweep('config.lr_configs.base_learning_rate', + [1e-4, 3e-4, 3e-5, 1e-5]) + + epochs = hyper.sweep('config.num_training_epochs', [100, 50, 200, 250]) + + return hyper.product([learning_rate, epochs]) diff --git a/experimental/robust_segvit/configs/street_hazards/toy_model.py b/experimental/robust_segvit/configs/street_hazards/toy_model.py new file mode 100644 index 000000000..0d251d923 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/toy_model.py @@ -0,0 +1,170 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train toy model on toy street_hazards dataset calling the robust_segvit codebase. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE +} + +# Model spec. +STRIDE = 4 +mlp_dim = 2 +num_heads = 1 +num_layers = 1 +hidden_size = 1 +target_size = (512, 512) + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for street_hazards segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_segmenter_ind_toy_model' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + config.dataset_configs.train_split = 'train' + config.dataset_configs.validation_split = 'validation' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = 'token' + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(2) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 0 + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 5e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # init not included + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = False + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_covariate_shift = True + config.eval_label_shift = True + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.batch_size = 8 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + return config + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/run_deterministic_street_hazards.sh b/experimental/robust_segvit/run_deterministic_street_hazards.sh new file mode 100755 index 000000000..7791854c3 --- /dev/null +++ b/experimental/robust_segvit/run_deterministic_street_hazards.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# train toy model using wandb +#wandb sweep run_toy_mac.yaml +# before make sure we can run code vanilla version: + +DATASET='ade20k_ind' # or cityscapes +DATASET='cityscapes' +DATASET='street_hazards' + +base_output_dir="gs://ub-ekb/segmenter/${DATASET}/deterministic" + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +config_file="configs/${DATASET}/deterministic.py" +run_name="deterministic_street_hazards" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--tpu=${tpu} \ diff --git a/experimental/robust_segvit/run_deterministic_street_hazards.yaml b/experimental/robust_segvit/run_deterministic_street_hazards.yaml new file mode 100755 index 000000000..182f288e0 --- /dev/null +++ b/experimental/robust_segvit/run_deterministic_street_hazards.yaml @@ -0,0 +1,38 @@ +name: deterministic_street_hazards_hparam +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.lr_configs.base_learning_rate: + values: [0.0001, 0.00001, 0.0003] + config.num_training_epochs: + values: [50, 100] + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/deterministic.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/deterministic" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From e819957bed3be0fd2f807fe5313048adedeadc61 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 20:05:23 -0400 Subject: [PATCH 075/150] remove call to wandb --- .../run_deterministic_street_hazards.sh | 34 ------------------- 1 file changed, 34 deletions(-) delete mode 100755 experimental/robust_segvit/run_deterministic_street_hazards.sh diff --git a/experimental/robust_segvit/run_deterministic_street_hazards.sh b/experimental/robust_segvit/run_deterministic_street_hazards.sh deleted file mode 100755 index 7791854c3..000000000 --- a/experimental/robust_segvit/run_deterministic_street_hazards.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# train toy model using wandb -#wandb sweep run_toy_mac.yaml -# before make sure we can run code vanilla version: - -DATASET='ade20k_ind' # or cityscapes -DATASET='cityscapes' -DATASET='street_hazards' - -base_output_dir="gs://ub-ekb/segmenter/${DATASET}/deterministic" - -# Debug on Mac OS X platform -use_gpu=False -if [ "$(uname)" = "Darwin" ] ; then -tpu=False -num_cores=1 -batch_size=5 -elif [ "$(uname)" = "Linux" ]; then -tpu='local' -num_cores=8 -batch_size=8 -fi - -config_file="configs/${DATASET}/deterministic.py" -run_name="deterministic_street_hazards" -output_dir="${base_output_dir}/${run_name}" -python deterministic.py \ ---output_dir=${output_dir} \ ---num_cores=$num_cores \ ---use_gpu=$use_gpu \ ---config=${config_file} \ ---config.batch_size=${batch_size} \ ---tpu=${tpu} \ From 087af3aee92580301ef35b473011f6bfb1ddef98 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 20:19:47 -0400 Subject: [PATCH 076/150] update bug in checkpoint name --- experimental/robust_segvit/configs/ade20k_ind/be.py | 2 +- experimental/robust_segvit/configs/ade20k_ind/deterministic.py | 2 +- experimental/robust_segvit/configs/ade20k_ind/gp.py | 2 +- experimental/robust_segvit/configs/ade20k_ind/het.py | 2 +- experimental/robust_segvit/configs/cityscapes/be.py | 2 +- experimental/robust_segvit/configs/cityscapes/deterministic.py | 2 +- experimental/robust_segvit/configs/cityscapes/gp.py | 2 +- experimental/robust_segvit/configs/cityscapes/het.py | 2 +- .../robust_segvit/configs/street_hazards/deterministic.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be.py b/experimental/robust_segvit/configs/ade20k_ind/be.py index 9e149362a..f90d5999a 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be.py @@ -54,7 +54,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py index 9472fd60b..b306cd3e1 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py @@ -54,7 +54,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp.py b/experimental/robust_segvit/configs/ade20k_ind/gp.py index fe3c3186d..5d9f516f4 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp.py @@ -54,7 +54,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/ade20k_ind/het.py b/experimental/robust_segvit/configs/ade20k_ind/het.py index 52ed0f2dd..755bdc016 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het.py @@ -54,7 +54,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/be.py b/experimental/robust_segvit/configs/cityscapes/be.py index 6f123e6db..b5b5e6a15 100644 --- a/experimental/robust_segvit/configs/cityscapes/be.py +++ b/experimental/robust_segvit/configs/cityscapes/be.py @@ -42,7 +42,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index e3d249b94..2e361d6d5 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -42,7 +42,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index 29205f705..b7bbb85c8 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -42,7 +42,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index 45e263afe..d14a433cd 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -42,7 +42,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index f8936f804..bd864e0eb 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -59,7 +59,7 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): - 'gs://vit_models/imagenet21k%2Bimagenet2012/ViT-L_16.npz', + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', } From 01977d2bb53989365aa8b2b79212d16e7bc594ab Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 20:20:37 -0400 Subject: [PATCH 077/150] set wandb default = False --- .../robust_segvit/configs/street_hazards/deterministic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index bd864e0eb..21f09e4ce 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -184,7 +184,7 @@ def get_config(runlocal=''): config.eval_robustness_configs.method_name = 'mlogit' # wandb.ai configurations. - config.use_wandb = True + config.use_wandb = False config.wandb_dir = 'wandb' config.wandb_project = 'rdl-debug' config.wandb_entity = 'ekellbuch' From ba6e1e4f8bac4542e6cdb06eb423bf5eb37905ff Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 22:10:38 -0400 Subject: [PATCH 078/150] debug loading gcp checkpoint from vision_transformer --- experimental/robust_segvit/README.md | 4 ++ .../robust_segvit/configs/ade20k_ind/be.py | 5 +- .../configs/ade20k_ind/deterministic.py | 4 +- .../robust_segvit/configs/ade20k_ind/gp.py | 4 +- .../robust_segvit/configs/ade20k_ind/het.py | 4 +- .../robust_segvit/configs/cityscapes/be.py | 4 +- .../configs/cityscapes/deterministic.py | 6 +- .../robust_segvit/configs/cityscapes/gp.py | 4 +- .../robust_segvit/configs/cityscapes/het.py | 4 +- .../configs/street_hazards/deterministic.py | 9 +-- .../custom_segmentation_trainer.py | 19 ++++++- .../robust_segvit/pretrainer_utils.py | 56 +++++++++++++++++++ 12 files changed, 109 insertions(+), 14 deletions(-) diff --git a/experimental/robust_segvit/README.md b/experimental/robust_segvit/README.md index 0971231c9..90e7ada02 100644 --- a/experimental/robust_segvit/README.md +++ b/experimental/robust_segvit/README.md @@ -25,3 +25,7 @@ To run the code on cpu, install the dependencies as in: ## Issues [] Fails to read segmenter_be model. + +## Comments +[x] Update default checkpoint: vit_large_patch16_384 (segmenter model uses this checkpoint) + from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py \ No newline at end of file diff --git a/experimental/robust_segvit/configs/ade20k_ind/be.py b/experimental/robust_segvit/configs/ade20k_ind/be.py index f90d5999a..075cdf4f3 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be.py @@ -46,7 +46,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (640, 640) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -55,6 +55,9 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', + } diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py index b306cd3e1..6a8d8ac43 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py @@ -46,7 +46,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (640, 640) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -55,6 +55,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp.py b/experimental/robust_segvit/configs/ade20k_ind/gp.py index 5d9f516f4..4e52c889c 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp.py @@ -46,7 +46,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (640, 640) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -55,6 +55,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/ade20k_ind/het.py b/experimental/robust_segvit/configs/ade20k_ind/het.py index 755bdc016..a5c4f41cb 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het.py @@ -46,7 +46,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (640, 640) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -55,6 +55,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/be.py b/experimental/robust_segvit/configs/cityscapes/be.py index b5b5e6a15..b3021ef13 100644 --- a/experimental/robust_segvit/configs/cityscapes/be.py +++ b/experimental/robust_segvit/configs/cityscapes/be.py @@ -34,7 +34,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (768, 768) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -43,6 +43,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index 2e361d6d5..38b0c7ec9 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -34,7 +34,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (768, 768) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -43,6 +43,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -212,7 +214,7 @@ def get_sweep(hyper): """Defines the hyper-parameters sweeps for doing grid search.""" checkpoints = hyper.chainit([ checkpoint(hyper, 'big_vision', 'L', 16, None, 'token', - 'i21k+imagenet2012'), + 'augreg+i21k+imagenet2012'), ]) epochs = hyper.sweep('config.num_training_epochs', [50, 100, 300]) diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index b7bbb85c8..4ef7143fb 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -34,7 +34,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (768, 768) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -43,6 +43,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index d14a433cd..733e60424 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -34,7 +34,7 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (768, 768) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream @@ -43,6 +43,8 @@ # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index 21f09e4ce..836667257 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -45,21 +45,22 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None CLASSIFIER = 'token' target_size = (1280, 720) -UPSTREAM_TASK = 'i21k+imagenet2012' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' # Upstream MODEL_PATHS = { - - # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 with augreg ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index c57705b25..fb28bae3e 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -46,6 +46,7 @@ from ood_metrics import get_ood_metrics # local file import from experimental.robust_segvit from ood_metrics import get_ood_score # local file import from experimental.robust_segvit from pretrainer_utils import convert_torch_to_jax_checkpoint # local file import from experimental.robust_segvit +from pretrainer_utils import convert_vision_transformer_to_scenic # local file import from experimental.robust_segvit from uncertainty_metrics import get_uncertainty_confusion_matrix # local file import from experimental.robust_segvit import resource @@ -746,6 +747,7 @@ def train( restored_model_cfg = config.get('pretrained_backbone_configs') # Loader from scenic + import pdb; pdb.set_trace() if restored_model_cfg.checkpoint_format in ('ub', 'big_vision', 'scenic'): # load params from checkpoint bb_train_state = pretrain_utils.convert_big_vision_to_scenic_checkpoint( @@ -760,9 +762,24 @@ def train( model_prefix_path=['backbone']) # Free unnecessary memory. del bb_train_state + # Loader from scenic + elif restored_model_cfg.checkpoint_format in ('vision_transformer'): + # load params from checkpoint + import pdb; pdb.set_trace() + bb_train_state = convert_vision_transformer_to_scenic(checkpoint_path=restored_model_cfg.checkpoint_path, convert_to_linen=False) + + train_state = model.init_backbone_from_train_state( + train_state, + bb_train_state, + config, + restored_model_cfg, + model_prefix_path=['backbone']) + import pdb; pdb.set_trace() + + # Free unnecessary memory. + del bb_train_state else: raise NotImplementedError('') - elif start_step == 0: logging.info('Not restoring from any pretrained_backbone.') diff --git a/experimental/robust_segvit/pretrainer_utils.py b/experimental/robust_segvit/pretrainer_utils.py index a5c4ffbcc..2e6217349 100644 --- a/experimental/robust_segvit/pretrainer_utils.py +++ b/experimental/robust_segvit/pretrainer_utils.py @@ -22,6 +22,7 @@ import numpy as np from scenic.train_lib_deprecated import train_utils from tensorflow.io import gfile +from flax.training import checkpoints def load_bb_config( @@ -200,3 +201,58 @@ def convert_torch_to_jax_checkpoint( # free memory del restored_params return restored_train_state + + +def convert_vision_transformer_to_scenic( + checkpoint_path: str, + convert_to_linen: bool = True) -> train_utils.TrainState: + """Converts a vision_transformer checkpoint to an scenic train state. + + The model weights come from https://github.com/google-research/vision_transformer. + + Original code: convert_big_vision_to_scenic_checkpoint + from https://github.com/google-research/scenic/ + + Args: + checkpoint_path: Path to checkpoint. + convert_to_linen: Whether to convert to Linen format. + + Returns: + restored_train_state: Scenic train state with model weights, global step + and accumulated training time. + """ + + def unflatten_dict(flattened: Dict[str, Any], + separator: str = '/', + leaf_idx: int = -1) -> Dict[str, Any]: + unflattened = {} + for k, v in flattened.items(): + subtree = unflattened + if leaf_idx != 0: + path = k.split(separator)[:leaf_idx] + else: + path = k.split(separator) + for k2 in path[:-1]: + if k2 not in subtree: + subtree[k2] = {} + subtree = subtree[k2] + subtree[path[-1]] = v + return unflattened + + logging.info('Loading vision_transformer checkpoint from %s', checkpoint_path) + checkpoint_data = np.load(gfile.GFile(checkpoint_path, 'rb')) + restored_params = unflatten_dict(checkpoint_data, separator='/', leaf_idx=0) + + if convert_to_linen: + restored_params = checkpoints.convert_pre_linen(restored_params) + restored_params = dict(restored_params) + + train_state = train_utils.TrainState() + # pytype: disable=wrong-arg-types + restored_train_state = train_state.replace( # pytype: disable=attribute-error + optimizer={"target": restored_params},) + # pytype: enable=wrong-arg-types + + # free memory + del restored_params + return restored_train_state \ No newline at end of file From f6cb15f0c2a2051778609c7e555db3e3242ca301 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 23:26:34 -0400 Subject: [PATCH 079/150] update config to match train_target_size --- experimental/robust_segvit/configs/ade20k_ind/be.py | 7 ++++--- .../robust_segvit/configs/ade20k_ind/deterministic.py | 7 ++++--- experimental/robust_segvit/configs/ade20k_ind/gp.py | 7 ++++--- experimental/robust_segvit/configs/ade20k_ind/het.py | 7 ++++--- experimental/robust_segvit/configs/cityscapes/be.py | 6 +++--- .../robust_segvit/configs/cityscapes/deterministic.py | 8 ++++---- experimental/robust_segvit/configs/cityscapes/gp.py | 6 +++--- experimental/robust_segvit/configs/cityscapes/het.py | 6 +++--- .../robust_segvit/configs/street_hazards/deterministic.py | 3 ++- experimental/robust_segvit/custom_segmentation_trainer.py | 3 --- 10 files changed, 31 insertions(+), 29 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be.py b/experimental/robust_segvit/configs/ade20k_ind/be.py index 075cdf4f3..5fb5b8451 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be.py @@ -40,7 +40,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -53,9 +53,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -186,6 +186,7 @@ def get_config(runlocal=''): if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size config.batch_size = 8 config.num_training_epochs = 5 config.warmup_steps = 0 diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py index 6a8d8ac43..58c0b2269 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py @@ -40,7 +40,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -53,9 +53,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -179,6 +179,7 @@ def get_config(runlocal=''): if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size config.batch_size = 8 config.num_training_epochs = 5 config.warmup_steps = 0 diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp.py b/experimental/robust_segvit/configs/ade20k_ind/gp.py index 4e52c889c..7c2945a59 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp.py @@ -40,7 +40,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -53,9 +53,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -192,6 +192,7 @@ def get_config(runlocal=''): if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size config.batch_size = 8 config.num_training_epochs = 5 config.warmup_steps = 0 diff --git a/experimental/robust_segvit/configs/ade20k_ind/het.py b/experimental/robust_segvit/configs/ade20k_ind/het.py index a5c4f41cb..123412f83 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het.py @@ -40,7 +40,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -53,9 +53,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -192,6 +192,7 @@ def get_config(runlocal=''): if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size config.batch_size = 8 config.num_training_epochs = 5 config.warmup_steps = 0 diff --git a/experimental/robust_segvit/configs/cityscapes/be.py b/experimental/robust_segvit/configs/cityscapes/be.py index b3021ef13..26cb94206 100644 --- a/experimental/robust_segvit/configs/cityscapes/be.py +++ b/experimental/robust_segvit/configs/cityscapes/be.py @@ -28,7 +28,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -41,9 +41,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index 38b0c7ec9..96282b0f4 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -28,7 +28,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -41,9 +41,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } @@ -213,7 +213,7 @@ def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, def get_sweep(hyper): """Defines the hyper-parameters sweeps for doing grid search.""" checkpoints = hyper.chainit([ - checkpoint(hyper, 'big_vision', 'L', 16, None, 'token', + checkpoint(hyper, 'vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'), ]) diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index 4ef7143fb..94474d919 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -28,7 +28,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -41,9 +41,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index 733e60424..2d4a91b8f 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -28,7 +28,7 @@ # Model specs. LOAD_PRETRAINED_BACKBONE = True -BACKBONE_ORIGIN = 'big_vision' +BACKBONE_ORIGIN = 'vision_transformer' VIT_SIZE = 'L' STRIDE = 16 RESNET_SIZE = None @@ -41,9 +41,9 @@ MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', - ('big_vision', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', } diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index 836667257..bd4a5a349 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -57,7 +57,7 @@ # Upstream MODEL_PATHS = { # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 with augreg - ('big_vision', 'L', 16, None, 'token', 'i21k+imagenet2012'): + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', @@ -198,6 +198,7 @@ def get_config(runlocal=''): if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size config.batch_size = 8 config.num_training_epochs = 5 config.warmup_steps = 0 diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index fb28bae3e..773106675 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -747,7 +747,6 @@ def train( restored_model_cfg = config.get('pretrained_backbone_configs') # Loader from scenic - import pdb; pdb.set_trace() if restored_model_cfg.checkpoint_format in ('ub', 'big_vision', 'scenic'): # load params from checkpoint bb_train_state = pretrain_utils.convert_big_vision_to_scenic_checkpoint( @@ -765,7 +764,6 @@ def train( # Loader from scenic elif restored_model_cfg.checkpoint_format in ('vision_transformer'): # load params from checkpoint - import pdb; pdb.set_trace() bb_train_state = convert_vision_transformer_to_scenic(checkpoint_path=restored_model_cfg.checkpoint_path, convert_to_linen=False) train_state = model.init_backbone_from_train_state( @@ -774,7 +772,6 @@ def train( config, restored_model_cfg, model_prefix_path=['backbone']) - import pdb; pdb.set_trace() # Free unnecessary memory. del bb_train_state From b1b0cb1b6c6b520928e61f8ec5872e04a6d97416 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 26 Sep 2022 23:40:16 -0400 Subject: [PATCH 080/150] add wanbd config options to all config files --- .../robust_segvit/configs/ade20k_ind/be.py | 19 ++++++++++++++++++- .../configs/ade20k_ind/deterministic.py | 17 +++++++++++++++++ .../robust_segvit/configs/ade20k_ind/gp.py | 17 +++++++++++++++++ .../robust_segvit/configs/ade20k_ind/het.py | 17 +++++++++++++++++ .../robust_segvit/configs/cityscapes/be.py | 19 +++++++++++++++++++ .../configs/cityscapes/deterministic.py | 18 ++++++++++++++++++ .../robust_segvit/configs/cityscapes/gp.py | 18 ++++++++++++++++++ .../robust_segvit/configs/cityscapes/het.py | 18 ++++++++++++++++++ 8 files changed, 142 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be.py b/experimental/robust_segvit/configs/ade20k_ind/be.py index 5fb5b8451..6edb44651 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -56,7 +58,7 @@ ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): - 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', # pylint: disable=g-long-lambda } @@ -183,6 +185,21 @@ def get_config(runlocal=''): config.eval_label_shift = False config.model.input_shape = target_size + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py index 58c0b2269..2025e027c 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -176,6 +178,21 @@ def get_config(runlocal=''): config.eval_label_shift = False config.model.input_shape = target_size + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp.py b/experimental/robust_segvit/configs/ade20k_ind/gp.py index 7c2945a59..41eef64ab 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -189,6 +191,21 @@ def get_config(runlocal=''): config.eval_label_shift = False config.model.input_shape = target_size + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) diff --git a/experimental/robust_segvit/configs/ade20k_ind/het.py b/experimental/robust_segvit/configs/ade20k_ind/het.py index 123412f83..3ec6ff989 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -189,6 +191,21 @@ def get_config(runlocal=''): config.eval_label_shift = False config.model.input_shape = target_size + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.dataset_configs.train_target_size = (128, 128) diff --git a/experimental/robust_segvit/configs/cityscapes/be.py b/experimental/robust_segvit/configs/cityscapes/be.py index 26cb94206..ed642b603 100644 --- a/experimental/robust_segvit/configs/cityscapes/be.py +++ b/experimental/robust_segvit/configs/cityscapes/be.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_TRAIN_SIZE = 2975 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -159,6 +161,23 @@ def get_config(runlocal=''): config.eval_configs.mode = 'standard' config.eval_covariate_shift = True config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index 96282b0f4..4e922550c 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_TRAIN_SIZE = 2975 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -153,6 +155,22 @@ def get_config(runlocal=''): config.eval_configs.mode = 'standard' config.eval_covariate_shift = True config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. if runlocal: config.count_flops = False diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index 94474d919..1de85fb22 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_TRAIN_SIZE = 2975 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -166,6 +168,22 @@ def get_config(runlocal=''): config.eval_configs.mode = 'standard' config.eval_covariate_shift = True config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. if runlocal: config.count_flops = False diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index 2d4a91b8f..064b8877b 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_TRAIN_SIZE = 2975 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -167,6 +169,22 @@ def get_config(runlocal=''): config.eval_configs.mode = 'standard' config.eval_covariate_shift = True config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. if runlocal: config.count_flops = False From f7aee61322497c62adb4f4c7988e0bdc14e408c0 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 17:26:13 -0400 Subject: [PATCH 081/150] update pavpu calculation based on softmax --- .../custom_segmentation_trainer.py | 5 + .../custom_segmentation_trainer_test.py | 56 +++++++++- .../robust_segvit/uncertainty_metrics.py | 103 +++++++++++++----- .../robust_segvit/uncertainty_metrics_test.py | 92 ---------------- 4 files changed, 137 insertions(+), 119 deletions(-) delete mode 100644 experimental/robust_segvit/uncertainty_metrics_test.py diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 773106675..11bf0c33d 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -49,6 +49,8 @@ from pretrainer_utils import convert_vision_transformer_to_scenic # local file import from experimental.robust_segvit from uncertainty_metrics import get_uncertainty_confusion_matrix # local file import from experimental.robust_segvit +import h5py +import os import resource import sys @@ -127,6 +129,7 @@ def evaluate(train_state: train_utils.TrainState, global_metrics_fn: Any, global_unc_metrics_fn: Optional[Any], prefix: str = 'valid', + workdir: str = '', ) -> Dict[str, Any]: """Model evaluator. @@ -922,6 +925,7 @@ def train( lead_host=lead_host, global_metrics_fn=global_metrics_fn, global_unc_metrics_fn=global_unc_metrics_fn, + workdir=workdir, ) # check accuracy for early stopping. @@ -1117,6 +1121,7 @@ def eval_ckpt( global_metrics_fn=global_metrics_fn, global_unc_metrics_fn=global_unc_metrics_fn, prefix=prefix, + workdir=workdir, ) # Wait until computations are done before running robustness evaluator. diff --git a/experimental/robust_segvit/custom_segmentation_trainer_test.py b/experimental/robust_segvit/custom_segmentation_trainer_test.py index 90b6c7553..41410f059 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer_test.py +++ b/experimental/robust_segvit/custom_segmentation_trainer_test.py @@ -34,7 +34,7 @@ from sklearn import metrics as sk_metrics import tensorflow as tf import custom_segmentation_trainer # local file import from experimental.robust_segvit - +import custom_models class SegmentationTrainerTest(parameterized.TestCase): """Tests the default trainer on single device setup.""" @@ -226,6 +226,60 @@ def test_get_confusion_matrix(self, seed, masked_fraction): self.assertAlmostEqual(metrics_dict['mean_iou'], miou_np, places=4) + @parameterized.parameters([(0, 0.0), (1, 0.01), (2, 0.5), (3, 0.99), (4, 1)]) + def test_unc_confusion_matrix(self, seed, masked_fraction): + """Test computation of mIoU metric.""" + np.random.seed(seed) + + # Create test data: + num_classes = 3 + input_shape = [8, 1, 224, 224] + logits_shape = input_shape + [num_classes] + logits_np = np.random.rand(*logits_shape) + logits = jnp.array(logits_np) + + # when the uncertainty threshold is 100% or = 0 + # all labels are certain, and pavpu is the fraction of patches that are accurate. + uncertainty_th = 0.0 + window_size = 1 + + # Note: We include label -1, which indicates excluded pixels: + label = np.random.randint(0, num_classes, size=input_shape) + label[:4] = np.argmax(logits_np[:4], axis=-1) # Set half to correct. + + batch_np = { + 'label': + label, + 'batch_mask': + (np.random.rand(*input_shape) > masked_fraction) & (label != -1), + } + batch = { + 'label': jnp.array(batch_np['label']), + 'batch_mask': jnp.array(batch_np['batch_mask']), + } + + cm_pmapped = jax.pmap( + functools.partial( + custom_segmentation_trainer.get_uncertainty_confusion_matrix, + uncertainty_th=uncertainty_th, + window_size=window_size, + uncertainty_measure='softmax', + ), axis_name='batch') + unc_confusion_matrix = [ + cm_pmapped(labels=labels, logits=logits_, weights=masks) + for labels, logits_, masks in + zip(batch['label'], logits, batch['batch_mask'])] + unc_confusion_matrix = jax.device_get(jax_utils.unreplicate(unc_confusion_matrix)) + metrics_dict = custom_models.global_unc_metrics_fn( + unc_confusion_matrix) + labels_negative_ignored = np.maximum(batch_np['label'], 0) + y_pred = np.argmax(logits_np, axis=-1) + weights = batch_np['batch_mask'] + accurate = labels_negative_ignored == y_pred + pavpu = np.sum(accurate * weights) / np.sum(weights) + # all labels are certain, pavpu = fraction of patches that are accurate + self.assertAlmostEqual(metrics_dict['pacc_cert'], jnp.nan_to_num(pavpu), places=2) + if __name__ == '__main__': absltest.main() diff --git a/experimental/robust_segvit/uncertainty_metrics.py b/experimental/robust_segvit/uncertainty_metrics.py index 031c1fecb..6327ee3d6 100644 --- a/experimental/robust_segvit/uncertainty_metrics.py +++ b/experimental/robust_segvit/uncertainty_metrics.py @@ -17,9 +17,7 @@ from typing import Optional, Tuple from jax import lax import jax.numpy as jnp -from scenic.model_lib.base_models.model_utils import apply_weights - -# TODO(kellybuchanan): reconcile cases where mask is 0. +from scenic.model_lib.layers import nn_ops def calculate_num_patches_binary_maps( @@ -29,7 +27,7 @@ def calculate_num_patches_binary_maps( Args: binary_acc_map : binary accuracy map - binary_unc_map : binary uncertainty map + binary_unc_map : binary uncertainty map (1=certain, 0=uncertain) Returns: metrics to calculate uncertainty scores @@ -37,28 +35,29 @@ def calculate_num_patches_binary_maps( # number of patches that are accurate and certain n_ac = jnp.sum( jnp.logical_and( - jnp.equal(binary_acc_map, 1), jnp.equal(binary_unc_map, 0)), + jnp.equal(binary_acc_map, 1), jnp.equal(binary_unc_map, 1)), axis=(-1, -2)) # number of patches that are inaccurate and certain n_ic = jnp.sum( jnp.logical_and( - jnp.equal(binary_acc_map, 0), jnp.equal(binary_unc_map, 0)), + jnp.equal(binary_acc_map, 0), jnp.equal(binary_unc_map, 1)), axis=(-1, -2)) # number of patches that are inaccurate and uncertain n_iu = jnp.sum( jnp.logical_and( - jnp.equal(binary_acc_map, 0), jnp.equal(binary_unc_map, 1)), + jnp.equal(binary_acc_map, 0), jnp.equal(binary_unc_map, 0)), axis=(-1, -2)) # number of patches that are accurate and uncertain n_au = jnp.sum( jnp.logical_and( - jnp.equal(binary_acc_map, 1), jnp.equal(binary_unc_map, 1)), + jnp.equal(binary_acc_map, 1), jnp.equal(binary_unc_map, 0)), axis=(-1, -2)) unc_confusion_matrix = jnp.stack((n_ac, n_ic, n_iu, n_au), axis=-1) + unc_confusion_matrix = unc_confusion_matrix[jnp.newaxis, ...] # Dummy batch dim. return unc_confusion_matrix @@ -88,15 +87,15 @@ def get_pavpu(unc_confusion_matrix): def get_uncertainty_confusion_matrix( logits: jnp.ndarray, labels: jnp.ndarray, + uncertainty_measure: str = 'softmax', + accuracy_measure : str = 'predictive_accuracy', weights: Optional[jnp.ndarray] = None, accuracy_th: Optional[float] = 0.5, uncertainty_th: Optional[float] = 0.5, - window_size: Optional[int] = 2 + window_size: Optional[int] = 2, ) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray]: """Calculate counts of patches accurate/inacurate and certain/uncertain. - TODO(kellybuchanan): include weights for entropy calculation. - Args: logits: predicted logits labels: true labels @@ -118,24 +117,37 @@ def get_uncertainty_confusion_matrix( preds = jnp.argmax(logits, axis=-1) # calculate binary accuracy map - correct = jnp.equal(preds, targets) + correct = jnp.equal(preds, targets).astype(jnp.float32) - # batch masking - if weights is not None: - correct = apply_weights(correct, weights) + if weights is None: + weights = jnp.ones(correct.shape) - correct = correct.astype(jnp.float32) + weights = weights.astype(jnp.float32) - # A given patch is accurate if its acc > accuracy_threshold - binary_acc_map = reduce_2dmap(correct, window_size, - accuracy_th).astype(jnp.float32) + if accuracy_measure == 'predictive_accuracy': + accuracy_map = correct + else: + raise NotImplementedError('Accuracy measure not implemented.') - # Calculate uncertainty map - entropy = get_entropy_from_logits(logits) + # A given patch is accurate if its acc > accuracy_threshold + binary_acc_map = reduce_2dmap_weighted(accuracy_map, + weights, + window_size=window_size, + threshold=accuracy_th).astype(jnp.float32) + + # Calculate uncertainty map: + if uncertainty_measure == 'softmax': + uncertainty_map = jnp.max(jnp.exp(logits) / jnp.sum(jnp.exp(logits), -1, keepdims=True), -1) + elif uncertainty_measure == 'entropy': + uncertainty_map = get_entropy_from_logits(logits) + else: + raise NotImplementedError(f'Uncertainty measure {uncertainty_measure} not implemented.') - # A given patch is uncertain if its uncertainty > uncertainty_th - binary_unc_map = reduce_2dmap(entropy, window_size, - uncertainty_th).astype(jnp.float32) + # A given patch is certain if its uncertainty > uncertainty_th + binary_unc_map = reduce_2dmap_weighted(uncertainty_map, + weights, + window_size=window_size, + threshold=uncertainty_th).astype(jnp.float32) # number of patches that are accurate and certain unc_confusion_matrix = calculate_num_patches_binary_maps( @@ -159,14 +171,14 @@ def reduce_2dmap( """Given a map, apply a 2d spatial strided convolution to avg adjacent values. Args: - array_map: array to be split. + array_map: array to be split. 3-D Tensor; With shape `[batch, in_rows, in_cols]. window_size: size of window. threshold: threshold for binarization. Returns: binary_map: binary map. """ - # Expand dimension to match filter C dimension. + # Expand dimension for dummy depth dimension array_map = jnp.expand_dims(array_map, -1) # Create a kernel @@ -193,6 +205,45 @@ def reduce_2dmap( return binary_map.astype(jnp.int32) +def reduce_2dmap_weighted( + array_map: jnp.ndarray, + weights: jnp.ndarray, + window_size: int = 4, + threshold: float = 0.5, +) -> jnp.ndarray: + """Given a map, apply a pooling operation to avg adjacent values. + + Args: + array_map: array to be split. 3-D Tensor; With shape `[batch, in_rows, in_cols]. + weights: array of weights. 3-D Tensor; With shape `[batch, in_rows, in_cols]. + window_size: size of window. + threshold: threshold for binarization. + data_format: str; The format of the `lhs`. Must be either `'NHWC'` or `'NCHW'`. + + Returns: + binary_map: binary map. + """ + # Expand dimension for dummy feature dimension + array_map = jnp.expand_dims(array_map, -1) + + window_shape = (window_size, window_size) + + outputs = nn_ops.weighted_avg_pool( + array_map, + weights, + window_shape=window_shape, + strides=window_shape, + padding='VALID') + + # Binarize_map according to threshold + binary_map = jnp.greater_equal(outputs, threshold) + + # Squeeze dummy feature dimension + binary_map = jnp.squeeze(binary_map, -1) + + return binary_map.astype(jnp.int32) + + class SegmentationUncertaintyMetrics(object): """Calculate uncertainty scores for image segmentation task.""" diff --git a/experimental/robust_segvit/uncertainty_metrics_test.py b/experimental/robust_segvit/uncertainty_metrics_test.py deleted file mode 100644 index c3a4c2a78..000000000 --- a/experimental/robust_segvit/uncertainty_metrics_test.py +++ /dev/null @@ -1,92 +0,0 @@ -# coding=utf-8 -# Copyright 2022 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for uncertainty_metrics.""" - -from absl.testing import absltest -from absl.testing import parameterized -import jax.numpy as jnp -from uncertainty_metrics import reduce_2dmap # local file import from experimental.robust_segvit -from uncertainty_metrics import SegmentationUncertaintyMetrics # local file import from experimental.robust_segvit - - -class UncertaintyMetricsTest(parameterized.TestCase): - - def setUp(self): - super(UncertaintyMetricsTest, self).setUp() - self.targets = jnp.asarray([[[1, 2, 5, 7], [6, 4, 3, 3], [10, 9, 5, 0], - [8, 6, 4, 4]]]) - - self.preds = jnp.asarray([[[1, 2, 4, 7], [5, 6, 3, 3], [10, 9, 4, 0], - [8, 7, 3, 4]]]) - - self.unc_map = jnp.asarray([[[0.1, 0.3, 0.6, 0.3], [0.7, 0.6, 0.2, 0.1], - [0.2, 0.4, 0.5, 0.3], [0.1, 0.7, 0.6, 0.2]]]) - - # create logit map from unc_map by mapping entropy vals (0.1,0.7) - # to a feasible range of logit vals:(4.1, 6.2) - self.logit_map = 4.1 + (0.7 - self.unc_map) * (6.2 - 4.1) / (0.7 - 0.1) - - self.window_size = 2 - self.accuracy_th = 0.5 - self.uncertainty_th = 0.4 - - # true values - self.true_binary_acc_map = jnp.asarray([[[0, 1], [1, 0]]]) - self.true_binary_unc_map = jnp.asarray([[[1, 0], [0, 0]]]) - self.true_p_accurate_certain = jnp.asarray([0.67]) - self.true_p_uncertain_innacurate = jnp.asarray([0.5]) - self.true_pavpu = jnp.asarray([0.75]) - - # construct logits passed as input from unc_map - self.num_classes = 11 - self.img_size = 4 - - true_mask = jnp.arange(self.img_size * self.img_size - ) * self.num_classes + self.preds.flatten() - logits = jnp.zeros((self.img_size * self.img_size * self.num_classes)) - logits = logits.at[true_mask].set(self.logit_map.flatten()) - self.logits = jnp.expand_dims( - logits.reshape((self.img_size, self.img_size, self.num_classes)), 0) - - def test_setup(self): - preds_logits = jnp.argmax(self.logits, -1) - self.assertTrue(jnp.array_equal(self.preds, preds_logits)) - - def test_calculate_pacc_cert(self): - segment_unc = SegmentationUncertaintyMetrics( - logits=self.logits, - labels=self.targets, - window_size=self.window_size, - accuracy_th=self.accuracy_th, - uncertainty_th=self.uncertainty_th) - - self.assertEqual(self.true_pavpu, segment_unc.pavpu) - self.assertAlmostEqual(self.true_p_accurate_certain, segment_unc.pacc_cert, - 2) - self.assertAlmostEqual(self.true_p_uncertain_innacurate, - segment_unc.puncert_inacc, 2) - - @parameterized.parameters((1), (2), (3)) - def test_reduce_2dmap(self, batch_size): - array_map = jnp.repeat(jnp.ones((1, 4, 4)), batch_size, axis=0) - true_binary_map = jnp.repeat(jnp.ones((1, 2, 2)), batch_size, axis=0) - binary_map = reduce_2dmap(array_map, self.window_size, self.accuracy_th) - - self.assertTrue(jnp.array_equal(true_binary_map, binary_map)) - - -if __name__ == '__main__': - absltest.main() From ffd92da46f2cde12e1730ee0176f191f00545638 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 22:32:52 -0400 Subject: [PATCH 082/150] add -1*mlogit as an ood metric --- experimental/robust_segvit/ood_metrics.py | 30 ++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/ood_metrics.py b/experimental/robust_segvit/ood_metrics.py index a1fd0c21b..2f273b02c 100644 --- a/experimental/robust_segvit/ood_metrics.py +++ b/experimental/robust_segvit/ood_metrics.py @@ -81,7 +81,7 @@ def preprocess_outlier(outlier): def get_ood_score( logits: jnp.ndarray, - method_name: str = 'msp', + method_name: str = 'nmlogit', num_top_k: int = 5, ) -> Dict[str, Any]: """Get OOD score.""" @@ -97,6 +97,9 @@ def get_ood_score( elif method_name == 'mlogit': max_logits = jnp.max(logits, -1) ood_score = 1 - max_logits + elif method_name == 'nmlogit': + max_logits = jnp.max(logits, -1) + ood_score = - 1 * max_logits elif method_name == 'sum_topklogit': ood_score = jax.lax.top_k(logits, num_top_k)[0].sum(-1) elif method_name == '1-sum_topklogit': @@ -108,6 +111,31 @@ def get_ood_score( return ood_score +def get_score( + logits: jnp.ndarray, + method_name: str = 'mlogit', + num_top_k: int = 5, + ) -> Dict[str, Any]: + """Get OOD score.""" + + if method_name == 'msp': + probs = jax.nn.softmax(logits, -1) + ood_score = jnp.max(probs, -1) + elif method_name == 'entropy': + probs = jax.nn.softmax(logits, -1) + entropy = -jnp.sum(probs * jnp.log(probs), axis=-1) + ood_score = entropy + elif method_name == 'mlogit': + ood_score = jnp.max(logits, -1) + elif method_name == 'sum_topklogit': + ood_score = jax.lax.top_k(logits, num_top_k)[0].sum(-1) + else: + raise NotImplementedError( + f'Missing method {method_name} to calculate OOD score.') + return ood_score + + + def get_ood_metrics( logits: jnp.ndarray, ood_mask: jnp.ndarray, From 6710faa56c507407c646fa21814702953b39fb7b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 22:48:08 -0400 Subject: [PATCH 083/150] add street hazards gp config --- .../configs/street_hazards/gp.py | 273 ++++++++++++++++++ .../robust_segvit/run_gp_street_hazards.yaml | 40 +++ 2 files changed, 313 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/gp.py create mode 100755 experimental/robust_segvit/run_gp_street_hazards.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/gp.py b/experimental/robust_segvit/configs/street_hazards/gp.py new file mode 100644 index 000000000..2e0c5cc32 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/gp.py @@ -0,0 +1,273 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on street_hazards. + + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE + +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (1280, 720) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 with augreg + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_gp_hyper' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 1. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + parameters = [ + hyper.sweep('config.model.decoder.gp_layer.normalize_input', + [True, False]), + hyper.sweep('config.model.decoder.mean_field_factor', + hyper.discrete(range(1, 10))), + hyper.sweep('config.model.decoder.gp_layer.hidden_kwargs.feature_scale', + [1.0, 2.0]), + ] + + return hyper.product(parameters) \ No newline at end of file diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml new file mode 100755 index 000000000..e40da9e69 --- /dev/null +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -0,0 +1,40 @@ +name: gp_street_hazards_hparam +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.model.decoder.gp_layer.normalize_input: + values: [0, 1] + config.model.decoder.mean_field_factor: + values: [1, 2, 5, 6, 10] + config.model.decoder.gp_layer.hidden_kwargs.feature_scale: + values: [1.0, 2.0] + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/deterministic.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/gp" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From fd93a84e91e872b323f6aebe7ce36f022fd052b0 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 22:52:03 -0400 Subject: [PATCH 084/150] add class to compute metrics offline --- .../robust_segvit/metrics_multihost.py | 142 +++++++++++++ .../robust_segvit/metrics_multihost_test.py | 199 ++++++++++++++++++ 2 files changed, 341 insertions(+) create mode 100644 experimental/robust_segvit/metrics_multihost.py create mode 100644 experimental/robust_segvit/metrics_multihost_test.py diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py new file mode 100644 index 000000000..95b884961 --- /dev/null +++ b/experimental/robust_segvit/metrics_multihost.py @@ -0,0 +1,142 @@ +"""Calculate ood metrics across hosts.""" +from typing import Any, Optional, Dict + +import jax +import jax.numpy as jnp +import tensorflow as tf +from jax.experimental import multihost_utils +from ood_metrics import get_ood_score +from ood_metrics import get_score + + +def keras_auc_to_arrays(keras_auc_object): + """Pull out arrays from keras roc object.""" + # The thresholds used are determinisitc, so we need not store them. + tp = jnp.asarray(keras_auc_object.true_positives) + fp = jnp.asarray(keras_auc_object.false_positives) + tn = jnp.asarray(keras_auc_object.true_negatives) + fn = jnp.asarray(keras_auc_object.false_negatives) + return tp, fp, tn, fn + + +def arrays_to_keras_auc(tp, fp, tn, fn, keras_auc_object): + """Assign confusion matrix arrays to a keras_auc_object.""" + keras_auc_object.true_positives.assign(tp) + keras_auc_object.false_positives.assign(fp) + keras_auc_object.true_negatives.assign(tn) + keras_auc_object.false_negatives.assign(fn) + return keras_auc_object + + +def combine_states(all_auc_states, num_thresholds=200): + # jax can take in trees of arrays, tuple is considered a tree so we can + # unpack it here. + # each array here has dimensions #host x shape + + all_tp, all_fp, all_tn, all_fn = all_auc_states + + assert all_tp.shape == (jax.process_count(), num_thresholds) + assert all_fp.shape == (jax.process_count(), num_thresholds) + assert all_tn.shape == (jax.process_count(), num_thresholds) + assert all_fn.shape == (jax.process_count(), num_thresholds) + + tp = jnp.sum(all_tp, 0) + fp = jnp.sum(all_fp, 0) + tn = jnp.sum(all_tn, 0) + fn = jnp.sum(all_fn, 0) + + return tp, fp, tn, fn + + +class ComputeAUCMetric: + """Calculate auc metrics across multiple hosts.""" + def __init__(self, curve, num_thresholds=200, from_logits=False): + self.curve = curve + self.num_thresholds = num_thresholds + self.from_logits = from_logits + self.auc = tf.keras.metrics.AUC(curve=self.curve, + from_logits=self.from_logits, + num_thresholds=self.num_thresholds) + + def calculate_and_update_scores(self, logits, label, sample_weight): + self.auc.update_state(label, logits, sample_weight=sample_weight) + + def gather_metrics(self): + auc_state = keras_auc_to_arrays(self.auc) + + # Gather the data across all hosts. + all_auc_states = multihost_utils.process_allgather(auc_state) + + # Below we pick the first device. + self.auc = arrays_to_keras_auc(*combine_states(all_auc_states, + num_thresholds=self.num_thresholds), + self.auc) + + return self.auc.result().numpy() + + +class ComputeOODAUCMetric: + """Calculate auc metrics across multiple hosts.""" + def __init__(self, curve, num_thresholds=200): + self.curve = curve + self.num_thresholds = num_thresholds + self.from_logits = False + self.auc = tf.keras.metrics.AUC(curve=self.curve, + from_logits=self.from_logits, + num_thresholds=self.num_thresholds) + + def calculate_and_update_scores(self, logits, label, sample_weight, *kwargs): + ood_score = get_ood_score(logits, *kwargs) + self.auc.update_state(label, ood_score, sample_weight=sample_weight) + + def gather_metrics(self): + auc_state = keras_auc_to_arrays(self.auc) + + # Gather the data across all hosts. + all_auc_states = multihost_utils.process_allgather(auc_state) + + # Below we pick the first device. + self.auc = arrays_to_keras_auc(*combine_states(all_auc_states, + num_thresholds=self.num_thresholds), + self.auc) + + return self.auc.result().numpy() + + +class ComputeScoreAUCMetric: + """Calculate score based auc metrics across multiple hosts.""" + def __init__(self, curve, num_thresholds=200, summation_method='interpolation',thresholds=None): + self.curve = curve + self.num_thresholds = num_thresholds + self.from_logits = False + self.summation_method = summation_method + self.thresholds = thresholds + self.auc = tf.keras.metrics.AUC(curve=self.curve, + from_logits=self.from_logits, + num_thresholds=self.num_thresholds, + summation_method=self.summation_method, + thresholds=self.thresholds) + + def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): + " label 1 for ood pixel and 0 is otherwise " + conf = - 1 * get_score(logits=logits, **kwargs) + + # skip images where all the pixels are ood or there are no ood pixels + all_pixel_ood = jnp.sum(label*sample_weight) == 1 + no_pixel_ood = jnp.sum(label*sample_weight) == 0 + + if not(all_pixel_ood) and not(no_pixel_ood): + self.auc.update_state(label, conf, sample_weight=sample_weight) + + def gather_metrics(self): + auc_state = keras_auc_to_arrays(self.auc) + + # Gather the data across all hosts. + all_auc_states = multihost_utils.process_allgather(auc_state) + + # Below we pick the first device. + self.auc = arrays_to_keras_auc(*combine_states(all_auc_states, + num_thresholds=self.num_thresholds), + self.auc) + + return self.auc.result().numpy() diff --git a/experimental/robust_segvit/metrics_multihost_test.py b/experimental/robust_segvit/metrics_multihost_test.py new file mode 100644 index 000000000..6efddf7af --- /dev/null +++ b/experimental/robust_segvit/metrics_multihost_test.py @@ -0,0 +1,199 @@ +from absl.testing import absltest +from absl.testing import parameterized + +import jax +import jax.numpy as jnp +import numpy as np +from flax import jax_utils + +from metrics_multihost import ComputeAUCMetric +from metrics_multihost import ComputeOODAUCMetric +from metrics_multihost import ComputeScoreAUCMetric +import sklearn.metrics + +from ood_metrics import get_ood_score + +class OODMetricsMultiHostTest(parameterized.TestCase): + + def setUp(self): + super(OODMetricsMultiHostTest, self).setUp() + + @parameterized.parameters([(0, 0.0), (1, 0.01), (2, 0.5), (3, 0.99), (4, 1)]) + def test_ComputeAUCMetric(self, seed, masked_fraction): + """Test computation of AUC metric.""" + np.random.seed(seed) + + from_logits = False # when set to True applies sigmoid to logits. + num_thresholds = 100 + + # Create test data: + num_classes = 2 + input_shape = [8, 1, 224, 224] + logits_shape = input_shape + [num_classes] + logits_np = np.random.rand(*logits_shape) + + # Note: We include label -1, which indicates excluded pixels: + label = np.random.randint(0, num_classes, size=input_shape) + label[:4] = np.argmax(logits_np[:4], axis=-1) # Set half to correct. + + batch_np = { + 'logits': logits_np, + 'label': + label, + 'batch_mask': + (np.random.rand(*input_shape) > masked_fraction) & (label != -1), + } + batch = { + 'logits': jnp.array(logits_np), + 'label': jnp.array(batch_np['label']), + 'batch_mask': jnp.array(batch_np['batch_mask']), + } + + fake_batches_replicated = jax_utils.replicate([batch]) + + auc_roc = ComputeAUCMetric(curve='ROC', num_thresholds=num_thresholds, from_logits=from_logits) + + for fake_batch in fake_batches_replicated: + if from_logits: + pred = jnp.max(fake_batch['logits'], axis=-1) + else: + pred = jnp.argmax(fake_batch['logits'], axis=-1) + auc_roc.calculate_and_update_scores(logits=pred, + label=fake_batch['label'], + sample_weight=fake_batch['batch_mask'], + ) + + auc_result = auc_roc.gather_metrics() + + # Numpy result: + if np.all(batch_np['batch_mask'] == 0): + auc_numpy = 0 + else: + labels_negative_ignored = np.maximum(batch_np['label'], 0) + y_pred = np.argmax(logits_np, axis=-1) + auc_numpy = sklearn.metrics.roc_auc_score(labels_negative_ignored.ravel(), + y_pred.ravel(), + sample_weight=batch_np['batch_mask'].ravel()) + + self.assertAlmostEqual(auc_result, auc_numpy, places=2) + + + @parameterized.parameters([(0, 0.0), (1, 0.01), (2, 0.5), (3, 0.99), (4, 1)]) + def test_ComputeOODAUCMetric(self, seed, masked_fraction): + """Test computation of OOD scored AUC metric.""" + np.random.seed(seed) + num_thresholds = 1000 + + ood_kwargs = {} + # Create test data: + num_classes = 2 + input_shape = [8, 1, 224, 224] + logits_shape = input_shape + [num_classes] + logits_np = np.random.rand(*logits_shape) + + # Note: We include label -1, which indicates excluded pixels: + label = np.random.randint(0, num_classes, size=input_shape) + label[:4] = np.argmax(logits_np[:4], axis=-1) # Set half to correct. + + batch_np = { + 'logits': logits_np, + 'label': + label, + 'batch_mask': + (np.random.rand(*input_shape) > masked_fraction) & (label != -1), + } + batch = { + 'logits': jnp.array(logits_np), + 'label': jnp.array(batch_np['label']), + 'batch_mask': jnp.array(batch_np['batch_mask']), + } + + fake_batches_replicated = jax_utils.replicate([batch]) + + auc_roc = ComputeOODAUCMetric(curve='ROC', num_thresholds=num_thresholds) + + for fake_batch in fake_batches_replicated: + pred = fake_batch['logits'] + ood_label = 1 - fake_batch['label'] + + auc_roc.calculate_and_update_scores(logits=pred, + label=ood_label, + sample_weight=fake_batch['batch_mask'], + *ood_kwargs, + ) + auc_result = auc_roc.gather_metrics() + + # Numpy result: + if np.all(batch_np['batch_mask'] == 0): + auc_numpy = 0 + else: + labels_negative_ignored = np.maximum(batch_np['label'], 0) + ood_label_np = 1 - labels_negative_ignored + ood_score = get_ood_score(logits_np, **ood_kwargs) + auc_numpy = sklearn.metrics.roc_auc_score(ood_label_np.ravel(), + ood_score.ravel(), + sample_weight=batch_np['batch_mask'].ravel()) + + self.assertAlmostEqual(auc_result, auc_numpy, places=1) + + @parameterized.parameters([(0, 0.0), (1, 0.01), (2, 0.5), (3, 0.99), (4, 1)]) + def test_ComputeScoreAUCMetric(self, seed, masked_fraction): + """Test computation of scored AUC metric.""" + np.random.seed(seed) + num_thresholds = 10000 + summation_method = 'interpolation' + ood_kwargs = {'method_name': 'msp'} + # Create test data: + num_classes = 2 + input_shape = [8, 1, 224, 224] + logits_shape = input_shape + [num_classes] + logits_np = np.random.rand(*logits_shape) + # Note: We include label -1, which indicates excluded pixels: + label = np.random.randint(0, num_classes, size=input_shape) + label[:4] = np.argmax(logits_np[:4], axis=-1) # Set half to correct. + + batch_np = { + 'logits': logits_np, + 'label': + label, + 'batch_mask': + (np.random.rand(*input_shape) > masked_fraction) & (label != -1), + } + batch = { + 'logits': jnp.array(logits_np), + 'label': jnp.array(batch_np['label']), + 'batch_mask': jnp.array(batch_np['batch_mask']), + } + + + fake_batches_replicated = jax_utils.replicate([batch]) + + auc_roc = ComputeScoreAUCMetric(curve='ROC', num_thresholds=num_thresholds, + summation_method=summation_method) + + for fake_batch in fake_batches_replicated: + pred = fake_batch['logits'] + ood_label = 1 - fake_batch['label'] + + auc_roc.calculate_and_update_scores(logits=pred, + label=ood_label, + sample_weight=fake_batch['batch_mask'], + **ood_kwargs, + ) + auc_result = auc_roc.gather_metrics() + + # Numpy result: + if np.all(batch_np['batch_mask'] == 0): + auc_numpy = 0 + else: + labels_negative_ignored = np.maximum(batch_np['label'], 0) + ood_label_np = 1 - labels_negative_ignored + ood_score = get_ood_score(logits_np, **ood_kwargs) + auc_numpy = sklearn.metrics.roc_auc_score(ood_label_np.ravel(), + ood_score.ravel(), + sample_weight=batch_np['batch_mask'].ravel()) + + self.assertAlmostEqual(auc_result, auc_numpy, places=1) + +if __name__ == '__main__': + absltest.main() From 89e4f884f227bfcf27c1ac987dba97b5bfa00fcd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 22:59:56 -0400 Subject: [PATCH 085/150] update gp hparam search to only include mean_field_factor --- experimental/robust_segvit/run_gp_street_hazards.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml index e40da9e69..e1c21083a 100755 --- a/experimental/robust_segvit/run_gp_street_hazards.yaml +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -16,13 +16,8 @@ parameters: value: ${{entity}} config.batch_size: value: 16 - config.model.decoder.gp_layer.normalize_input: - values: [0, 1] config.model.decoder.mean_field_factor: values: [1, 2, 5, 6, 10] - config.model.decoder.gp_layer.hidden_kwargs.feature_scale: - values: [1.0, 2.0] - command: From 9589a11d9633926045a24147fb92dc9ad583794b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 23:13:49 -0400 Subject: [PATCH 086/150] fix bug where the config called was deterministic --- experimental/robust_segvit/run_gp_street_hazards.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml index e1c21083a..417303f0b 100755 --- a/experimental/robust_segvit/run_gp_street_hazards.yaml +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -25,7 +25,7 @@ command: - python - ${program} - "--config" - - "configs/street_hazards/deterministic.py" + - "configs/street_hazards/gp.py" - "--output_dir" - "gs://ub-ekb/segmenter/street_hazards/gp" - "--num_cores" From f6c2acaf73982f304daf432676ce45fdf880489f Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 28 Sep 2022 23:29:02 -0400 Subject: [PATCH 087/150] update batch_size to fit in memory --- experimental/robust_segvit/run_gp_street_hazards.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml index 417303f0b..f29a1dbe9 100755 --- a/experimental/robust_segvit/run_gp_street_hazards.yaml +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -15,7 +15,7 @@ parameters: config.wandb_entity : value: ${{entity}} config.batch_size: - value: 16 + value: 8 config.model.decoder.mean_field_factor: values: [1, 2, 5, 6, 10] From 128eaa2e105ec9e661ee4f6e67f82d6bcbdc23da Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 29 Sep 2022 00:15:05 -0400 Subject: [PATCH 088/150] update img dimension used for experiment --- .../robust_segvit/configs/street_hazards/deterministic.py | 2 +- experimental/robust_segvit/configs/street_hazards/gp.py | 2 +- experimental/robust_segvit/run_gp_street_hazards.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index bd4a5a349..ce22f02c5 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -50,7 +50,7 @@ STRIDE = 16 RESNET_SIZE = None CLASSIFIER = 'token' -target_size = (1280, 720) +target_size = (720, 720) UPSTREAM_TASK = 'augreg+i21k+imagenet2012' diff --git a/experimental/robust_segvit/configs/street_hazards/gp.py b/experimental/robust_segvit/configs/street_hazards/gp.py index 2e0c5cc32..9daccba96 100644 --- a/experimental/robust_segvit/configs/street_hazards/gp.py +++ b/experimental/robust_segvit/configs/street_hazards/gp.py @@ -49,7 +49,7 @@ STRIDE = 16 RESNET_SIZE = None CLASSIFIER = 'token' -target_size = (1280, 720) +target_size = (720, 720) UPSTREAM_TASK = 'augreg+i21k+imagenet2012' diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml index f29a1dbe9..417303f0b 100755 --- a/experimental/robust_segvit/run_gp_street_hazards.yaml +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -15,7 +15,7 @@ parameters: config.wandb_entity : value: ${{entity}} config.batch_size: - value: 8 + value: 16 config.model.decoder.mean_field_factor: values: [1, 2, 5, 6, 10] From d0e9fed044fdfc66841d4bccd37477a22f08e6af Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 29 Sep 2022 01:01:12 -0400 Subject: [PATCH 089/150] remove old cityscapes dataset --- experimental/cityscapes/README.md | 84 --- experimental/cityscapes/call_deterministic.sh | 25 - experimental/cityscapes/call_tensorboard.sh | 13 - experimental/cityscapes/custom_models.py | 268 --------- .../cityscapes/custom_segmentation_eval.py | 464 ---------------- .../cityscapes/custom_segmentation_trainer.py | 524 ------------------ experimental/cityscapes/deterministic.py | 187 ------- experimental/cityscapes/deterministic_eval.py | 161 ------ .../cityscapes/deterministic_eval_gcp.py | 174 ------ .../cityscapes/deterministic_eval_l32.py | 170 ------ experimental/cityscapes/ensemble_eval.py | 254 --------- .../experiments/cityscapes_segvit_config.py | 227 -------- .../imagenet21k_segmenter_cityscapes.py | 153 ----- .../imagenet21k_segmenter_cityscapes1.py | 177 ------ .../imagenet21k_segmenter_cityscapes3.py | 137 ----- .../imagenet21k_segmenter_cityscapes512.py | 153 ----- .../imagenet21k_segmenter_cityscapes_sngp.py | 153 ----- ...magenet21k_segmenter_cityscapes_sngp512.py | 153 ----- .../pod/imagenet21k_segmenter_cityscapes.py | 153 ----- .../imagenet21k_segmenter_cityscapes_sngp.py | 153 ----- .../experiments/pod/segmenter_cityscapes.py | 153 ----- .../experiments/segmenter_cityscapes.py | 153 ----- .../experiments/segmenter_cityscapes512.py | 153 ----- ...k_segmenter_cityscapes_deterministic_10.py | 176 ------ ..._segmenter_cityscapes_deterministic_100.py | 173 ------ ...k_segmenter_cityscapes_deterministic_25.py | 173 ------ ...k_segmenter_cityscapes_deterministic_50.py | 173 ------ ...k_segmenter_cityscapes_deterministic_75.py | 173 ------ .../imagenet21k_segmenter_cityscapes_gp_10.py | 173 ------ ...imagenet21k_segmenter_cityscapes_gp_100.py | 173 ------ .../imagenet21k_segmenter_cityscapes_gp_25.py | 173 ------ .../imagenet21k_segmenter_cityscapes_gp_50.py | 173 ------ .../imagenet21k_segmenter_cityscapes_gp_75.py | 173 ------ ...enet21k_segmenter_cityscapes_scratch_10.py | 173 ------ ...net21k_segmenter_cityscapes_scratch_100.py | 173 ------ ...enet21k_segmenter_cityscapes_scratch_25.py | 173 ------ ...enet21k_segmenter_cityscapes_scratch_50.py | 173 ------ ...enet21k_segmenter_cityscapes_scratch_75.py | 173 ------ ...t21k_segmenter_cityscapes_deterministic.py | 166 ------ ...1k_segmenter_cityscapes_deterministic.yaml | 43 -- .../experiments/toy/segmenter_cityscapes.py | 161 ------ experimental/cityscapes/pretrainer_utils.py | 46 -- .../cityscapes/run_deterministic_eval.sh | 46 -- .../cityscapes/run_deterministic_eval_l32.sh | 46 -- .../run_deterministic_eval_splits_vm.sh | 42 -- .../cityscapes/run_deterministic_splits_vm.sh | 34 -- experimental/cityscapes/run_ensemble.sh | 75 --- experimental/cityscapes/run_ensemble2.sh | 76 --- experimental/cityscapes/run_ensemble_debug.sh | 85 --- experimental/cityscapes/run_ensemble_eval.sh | 61 -- experimental/cityscapes/run_l32_splits_vm.sh | 63 --- experimental/cityscapes/run_pretrained.sh | 34 -- experimental/cityscapes/run_pretrained_vm.sh | 23 - .../cityscapes/run_pretrained_vm512.sh | 23 - .../cityscapes/run_pretrained_vm_larger.sh | 23 - .../cityscapes/uncertainty_metrics.py | 205 ------- 56 files changed, 8092 deletions(-) delete mode 100644 experimental/cityscapes/README.md delete mode 100755 experimental/cityscapes/call_deterministic.sh delete mode 100755 experimental/cityscapes/call_tensorboard.sh delete mode 100644 experimental/cityscapes/custom_models.py delete mode 100644 experimental/cityscapes/custom_segmentation_eval.py delete mode 100644 experimental/cityscapes/custom_segmentation_trainer.py delete mode 100644 experimental/cityscapes/deterministic.py delete mode 100644 experimental/cityscapes/deterministic_eval.py delete mode 100644 experimental/cityscapes/deterministic_eval_gcp.py delete mode 100644 experimental/cityscapes/deterministic_eval_l32.py delete mode 100644 experimental/cityscapes/ensemble_eval.py delete mode 100644 experimental/cityscapes/experiments/cityscapes_segvit_config.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py delete mode 100644 experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py delete mode 100644 experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py delete mode 100644 experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py delete mode 100644 experimental/cityscapes/experiments/pod/segmenter_cityscapes.py delete mode 100644 experimental/cityscapes/experiments/segmenter_cityscapes.py delete mode 100644 experimental/cityscapes/experiments/segmenter_cityscapes512.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py delete mode 100644 experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py delete mode 100644 experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py delete mode 100644 experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml delete mode 100644 experimental/cityscapes/experiments/toy/segmenter_cityscapes.py delete mode 100644 experimental/cityscapes/pretrainer_utils.py delete mode 100755 experimental/cityscapes/run_deterministic_eval.sh delete mode 100755 experimental/cityscapes/run_deterministic_eval_l32.sh delete mode 100755 experimental/cityscapes/run_deterministic_eval_splits_vm.sh delete mode 100755 experimental/cityscapes/run_deterministic_splits_vm.sh delete mode 100755 experimental/cityscapes/run_ensemble.sh delete mode 100755 experimental/cityscapes/run_ensemble2.sh delete mode 100755 experimental/cityscapes/run_ensemble_debug.sh delete mode 100755 experimental/cityscapes/run_ensemble_eval.sh delete mode 100755 experimental/cityscapes/run_l32_splits_vm.sh delete mode 100755 experimental/cityscapes/run_pretrained.sh delete mode 100755 experimental/cityscapes/run_pretrained_vm.sh delete mode 100755 experimental/cityscapes/run_pretrained_vm512.sh delete mode 100755 experimental/cityscapes/run_pretrained_vm_larger.sh delete mode 100644 experimental/cityscapes/uncertainty_metrics.py diff --git a/experimental/cityscapes/README.md b/experimental/cityscapes/README.md deleted file mode 100644 index 35b8cadc4..000000000 --- a/experimental/cityscapes/README.md +++ /dev/null @@ -1,84 +0,0 @@ - -# Log - -[x] add cityscapes dataset loader.
-[x] add deterministic training for segmenter.
-[x] include transfer learning option: init from pretrained backbone.
-[x] include option to train vit+ model using different train split.
-[x] add pavpu metric.
-[x] calculate uncertainty metrics.
- -[x] add run with vit l-32 backbone: run_l32_splits_vm.sh
-[x] add eval for vit l-32 models: run_deterministic_eval_l32.sh
- -## [Wandb integration ](https://docs.wandb.ai/guides/sweeps/configuration)
-[x] Visualize results in wandb: run_ensemble.sh
-[x] Hyperparameter sweep: experiments/toy/toy_sweep
- -``` -wandb sweep experiments/toy/toy_sweep.yaml -wandb agent ${SWEEPID} -``` - -## Experiments - -Fully implemented:
- -[x] Vanilla deterministic upstream + deterministic downstream.
-Given a deterministic model trained on imagenet21k, -replace the decoder by a segmentation decoder and finetune the model on cityscapes. -``` -wandb sweep experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml -wandb agent -``` -Once the models have trained independently, we can evaluate the results by running:
-``` -./run_deterministic_eval_l32.sh -``` - -Missing wandb configuration:
- -[x] Ensemble (ensemble upstream + ensemble downstream).
-Given E deterministic models trained on imagenet21k, -replace the E decoders in each model by E new segmentations encoders -(This step is achieved by calling get_pretrained_backbone_path) -Finetune each model separately on cityscapes -Then, aggregate the results. - -``` -./run_ensemble2.sh -``` -Once the models have trained independently, we can aggregate the results by running:
-``` -./run_ensemble_eval.sh -``` -[] Ensemble (ensemble upstream + deterministic downstream).
-Given E deterministic models trained on imagenet21k, -replace the E decoders in each model by 1 new segmentations encoder. -Finetune the new model on cityscapes. -``` - -``` -[] Batch Ensemble (batch ensemble upstream + deterministic downstream).
-Given a BE deterministic model trained on imagenet21k, -replace the MLP blocks in the encoder by -replace the E decoders in each model by rank-1 decoder which to get outputs [N, E, K]. -Finetune the new model on cityscapes. -Average over E to get the results. -``` - -``` - -To compare parameter between vit and vit_be model run: -``` -python -m unittest -v uncertainty_baselines/models/vit_batchensemble_test.py - -``` - -To debug segmenter BE model run: -``` -python -m unittest -v uncertainty_baselines/models/segmenter_be_test.py - -``` - - diff --git a/experimental/cityscapes/call_deterministic.sh b/experimental/cityscapes/call_deterministic.sh deleted file mode 100755 index ec73bb6bd..000000000 --- a/experimental/cityscapes/call_deterministic.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -echo -if [ "$(uname)" == "Darwin" ]; then - echo "Debug On mac" - # Do something under Mac OS X platform - output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" - config_file="experiments/toy/segmenter_cityscapes.py" - num_cores=0 - tpu='None' - use_gpu=False - rng_seed=2 - python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - - -elif [ "$(uname)" == "Linux" ]; then - echo "run run_pretrained_vm.sh instead" -fi - -#python deterministic.py "--output_dir=$output_dir --num_cores=$num_cores --use_gpu=$use_gpu --tpu=$tpu --config=$config" - diff --git a/experimental/cityscapes/call_tensorboard.sh b/experimental/cityscapes/call_tensorboard.sh deleted file mode 100755 index b9c57d647..000000000 --- a/experimental/cityscapes/call_tensorboard.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -# map gsbucket to local -# ~/go/bin/gcsfuse --only-dir segmenter/cityscapes/run0 ub-ekb run0 - -# read local directory: -results_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/gsbucket_out/run0" -for d in results_dir ; do - echo "$d" -done - -tensorboard --logdir ${results_dir} --reload_multifile True - diff --git a/experimental/cityscapes/custom_models.py b/experimental/cityscapes/custom_models.py deleted file mode 100644 index 70dcc01a0..000000000 --- a/experimental/cityscapes/custom_models.py +++ /dev/null @@ -1,268 +0,0 @@ -""" -Custom models which allow for model inheritance -""" -import re -from typing import Any, Mapping, Optional, Tuple, List, Union - -import flax -import ml_collections -import numpy as np -import scipy -from absl import logging - -from scenic.model_lib.base_models.segmentation_model import SegmentationModel, \ - semantic_segmentation_metrics_function, _SEMANTIC_SEGMENTATION_METRICS, num_pixels -from scenic.train_lib import train_utils -from uncertainty_baselines.models.segmenter import SegVit -from immutabledict import immutabledict - - -from scenic.model_lib.base_models import base_model -from scenic.model_lib.base_models import model_utils -# JAX team is working on type annotation for pytree: -# https://github.com/google/jax/issues/1555 -PyTree = Union[Mapping[str, Mapping], Any] -import functools - -from uncertainty_metrics import calculate_pavpu, calculate_puncert_inacc, \ - calculate_pacc_cert - -# Standard default metrics for the semantic segmentation models. -_SEMANTIC_SEGMENTATION_METRICS_UNC = immutabledict({ - 'accuracy': (model_utils.weighted_correctly_classified, num_pixels), - - # The loss is already normalized, so we set num_pixels to 1.0: - 'loss': (model_utils.weighted_softmax_cross_entropy, lambda *a, **kw: 1.0), - - # The pavpu is already normalized, so we set num_pixels to 1.0: - 'pavpu': (calculate_pavpu, lambda *a, **kw: 1.0), - - # The pavpu is already normalized, so we set num_pixels to 1.0: - 'puncert_inacc': (calculate_puncert_inacc, lambda *a, **kw: 1.0), - - # The pavpu is already normalized, so we set num_pixels to 1.0: - 'pacc_cert': (calculate_pacc_cert, lambda *a, **kw: 1.0), - -}) - - - -class SegmenterSegmentationModel(SegmentationModel): - """Segmenter model for segmentation task.""" - - def build_flax_model(self): - return SegVit( - num_classes=self.dataset_meta_data['num_classes'], - patches=self.config.patches, - backbone_configs=self.config.backbone_configs, - decoder_configs=self.config.decoder_configs) - - def default_flax_model_config(self): - raise NotImplementedError() - - def init_backbone_from_train_state( - self, - train_state: train_utils.TrainState, - restored_train_state: Mapping[str, Any], - restored_model_cfg: ml_collections.ConfigDict, - ckpt_prefix_path: Optional[List[str]] = None, - model_prefix_path: Optional[List[str]] = None, - name_mapping: Optional[Mapping[str, str]] = None, - skip_regex: Optional[str] = None) -> train_utils.TrainState: - """Updates the train_state with data from pretrain_state. - - Args: - train_state: A raw TrainState for the model. - restored_train_state: A TrainState that is loaded with parameters/state of - a pretrained model. - restored_model_cfg: Configuration of the model from which the - restored_train_state come from. Usually used for some asserts. - ckpt_prefix_path: Prefix to restored model parameters. - model_prefix_path: Prefix to the parameters to replace in the subtree model. - name_mapping: Mapping from parameter names of checkpoint to this model. - skip_regex: If there is a parameter whose parent keys match the regex, - the parameter will not be replaced from pretrain_state. - - Returns: - Updated train_state. - """ - # --------------------------------- - # Get grid sizes of target model: - gs_segvit = [ - self.config.dataset_configs.target_size[0] // - self.config.patches.size[0], - self.config.dataset_configs.target_size[1] // - self.config.patches.size[1] - ] - # Find size of positional embeddings (grid size) if given as input - # otherwise we will take the will use the model checkpoint to estimate thiis - if ('patches' in restored_model_cfg) or ('stem_configs' in restored_model_cfg): - if ('patches' in restored_model_cfg): - restored_patches_cfg = restored_model_cfg.patches - else: - restored_patches_cfg = restored_model_cfg.stem_configs.patches - gs_vit = restored_patches_cfg.grid - else: - gs_vit = None - - # init_dset_meta = self.config.model.backbone.init_from.dataset_meta_data - # gs_vit = [ - # init_dset_meta['input_shape'][1] // restored_patches_cfg.size[0], - # init_dset_meta['input_shape'][2] // restored_patches_cfg.size[1], - # ] - - # --------------------------------- - name_mapping = name_mapping or {} - - # converts pre-linen which doesn't apply here - # (restored_params, - # restored_model_state) = get_params_and_model_state_dict(restored_train_state) - restored_params = restored_train_state['optimizer']['target'] - restored_model_state = restored_train_state.get('model_state') - - model_params = train_state.optimizer.target - model_params = _replace_dict(model_params, - restored_params, - restored_model_cfg, - gs_vit, - gs_segvit, - ckpt_prefix_path, - model_prefix_path, - name_mapping, - skip_regex) - new_optimizer = train_state.optimizer.replace( - target=model_params) - train_state = train_state.replace( # pytype: disable=attribute-error - optimizer=new_optimizer) - if (restored_model_state is not None and - train_state.model_state is not None and - train_state.model_state): - if model_prefix_path: - # Insert model prefix after 'batch_stats'. - model_prefix_path = ['batch_stats'] + model_prefix_path - if 'batch_stats' in restored_model_state: - ckpt_prefix_path = ckpt_prefix_path or [] - ckpt_prefix_path = ['batch_stats'] + ckpt_prefix_path - elif 'batch_stats' not in restored_model_state: # Backward compatibility. - model_prefix_path = ['batch_stats'] - if ckpt_prefix_path and ckpt_prefix_path[0] != 'batch_stats': - ckpt_prefix_path = ['batch_stats'] + ckpt_prefix_path - model_state = _replace_dict(train_state.model_state, - restored_model_state, - restored_model_cfg, - gs_vit, - gs_segvit, - ckpt_prefix_path, - model_prefix_path, - name_mapping, - skip_regex) - train_state = train_state.replace( # pytype: disable=attribute-error - model_state=model_state) - return train_state - - def get_metrics_fn_unc(self, split: Optional[str] = None) -> base_model.MetricFn: - """Returns a callable metric function for the model. - - Edited from get_metrics_fn to support additional uncertainty metrics. - - Args: - split: The split for which we calculate the metrics. It should be one of - the ['train', 'validation', 'test']. - Returns: A metric function with the following API: ```metrics_fn(logits, - batch)``` - """ - del split # For all splits, we return the same metric functions. - return functools.partial( - semantic_segmentation_metrics_function, - target_is_onehot=self.dataset_meta_data.get('target_is_onehot', False), - metrics=_SEMANTIC_SEGMENTATION_METRICS_UNC) - - -def _replace_dict(model: PyTree, - restored: PyTree, - restored_model_cfg: ml_collections, - gs_vit: Optional[Tuple] = None, - gs_segvit: Optional[Tuple] = None, - ckpt_prefix_path: Optional[List[str]] = None, - model_prefix_path: Optional[List[str]] = None, - name_mapping: Optional[Mapping[str, str]] = None, - skip_regex: Optional[str] = None) -> PyTree: - """Replaces values in model dictionary with restored ones from checkpoint. - - Include changes to facilitate loading of pretrained variables - from an encoder w a token classifier. - """ - - model = flax.core.unfreeze(model) # pytype: disable=wrong-arg-types - restored = flax.core.unfreeze(restored) # pytype: disable=wrong-arg-types - - if ckpt_prefix_path: - for p in ckpt_prefix_path: - restored = restored[p] - - if model_prefix_path: - for p in reversed(model_prefix_path): - restored = {p: restored} - - # Flatten nested parameters to a dict of str -> tensor. Keys are tuples - # from the path in the nested dictionary to the specific tensor. E.g., - # {'a1': {'b1': t1, 'b2': t2}, 'a2': t3} - # -> {('a1', 'b1'): t1, ('a1', 'b2'): t2, ('a2',): t3}. - restored_flat = flax.traverse_util.flatten_dict( - dict(restored), keep_empty_nodes=True) - model_flat = flax.traverse_util.flatten_dict( - dict(model), keep_empty_nodes=True) - - for m_key, m_params in restored_flat.items(): - # pytype: disable=attribute-error - for name, to_replace in name_mapping.items(): - m_key = tuple(to_replace if k == name else k for k in m_key) - # pytype: enable=attribute-error - m_key_str = '/'.join(m_key) - if m_key not in model_flat: - logging.warning( - '%s in checkpoint doesn\'t exist in model. Skip.', m_key_str) - continue - if skip_regex and re.findall(skip_regex, m_key_str): - logging.info('Skip loading parameter %s.', m_key_str) - continue - logging.info('Loading %s from checkpoint into model', m_key_str) - - # resize positional embeddings given token - if 'posembed_input' in m_key: # might need resolution change - # the backbone should be pose segmenter - # vit_posemb = m_params['posembed_input']['pos_embedding'] - vit_posemb = m_params - # segvit_posemb = backbone[m_key]['posembed_input']['pos_embedding'] - segvit_posemb = model_flat[m_key] - - if vit_posemb.shape != segvit_posemb.shape: - # rescale the grid of pos, embeddings: param shape is (1,N,768) - segvit_ntok = segvit_posemb.shape[1] - if restored_model_cfg.classifier == 'token': - # the first token is the CLS token - vit_posemb = vit_posemb[0, 1:] - else: - vit_posemb = vit_posemb[0] - logging.info('Resized variant: %s to %s', vit_posemb.shape, - segvit_posemb.shape) - if gs_vit is None: - gs_vit = [int(np.sqrt(vit_posemb.shape[0])), int(np.sqrt(vit_posemb.shape[0]))] - - assert np.prod(gs_vit) == vit_posemb.shape[0] - assert np.prod(gs_segvit) == segvit_ntok - if gs_vit != gs_segvit: # we need resolution change - logging.info('Grid-size from %s to %s', gs_vit, gs_segvit) - vit_posemb_grid = vit_posemb.reshape(gs_vit + [-1]) - zoom = (gs_segvit[0] / gs_vit[0], gs_segvit[1] / gs_vit[1], 1) - vit_posemb_grid = scipy.ndimage.zoom(vit_posemb_grid, zoom, order=1) - vit_posemb = vit_posemb_grid.reshape(1, np.prod(gs_segvit), -1) - else: # just the cls token was extra and we are now fine - vit_posemb = np.expand_dims(vit_posemb, axis=0) - m_params = vit_posemb - - assert model_flat[m_key].shape == m_params.shape - model_flat[m_key] = m_params - - return flax.core.freeze(flax.traverse_util.unflatten_dict(model_flat)) - diff --git a/experimental/cityscapes/custom_segmentation_eval.py b/experimental/cityscapes/custom_segmentation_eval.py deleted file mode 100644 index c7eb63b6c..000000000 --- a/experimental/cityscapes/custom_segmentation_eval.py +++ /dev/null @@ -1,464 +0,0 @@ -""" -Custom segmentation_trainer.py - -# cannot import train_step, eval_step due to tuple segmenter output in ub implementation -Minor changes to account for ub models which ouput a tuple (logits, dict) -""" - -import functools -from typing import Any, Callable, Dict, Tuple, Optional, Type, Sequence - -import flax.linen as nn -import jax -import jax.numpy as jnp -import jax.profiler -import ml_collections -import numpy as np -from absl import logging -from clu import metric_writers -from clu import periodic_actions -from flax import jax_utils -from jax.experimental.optimizers import clip_grads - -from scenic.dataset_lib import dataset_utils -from scenic.model_lib.base_models import base_model -from scenic.train_lib import lr_schedules -from scenic.train_lib import optimizers -from scenic.train_lib import train_utils -import h5py -import os -# instead of importing we use local functions -# from scenic.train_lib.segmentation_trainer import train_step, eval_step, _draw_side_by_side -Batch = Dict[str, jnp.ndarray] -MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], - Dict[str, Tuple[float, int]]] -LossFn = Callable[[jnp.ndarray, Batch, Optional[jnp.ndarray]], float] - -from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix -from flax.training.checkpoints import restore_checkpoint as flax_restore_checkpoint -from scenic.train_lib import pretrain_utils - -from pretrainer_utils import load_bb_config -from pathlib import Path - -def eval_step1( - *, - flax_model: nn.Module, - train_state: train_utils.TrainState, - batch: Batch, - metrics_fn: MetricFn, - debug: Optional[bool] = False -) -> Tuple[Batch, jnp.ndarray, Dict[str, Tuple[float, int]], jnp.ndarray]: - """Runs a single step of training. - - Note that in this code, the buffer of the second argument (batch) is donated - to the computation. - - Assumed API of metrics_fn is: - ```metrics = metrics_fn(logits, batch) - where batch is yielded by the batch iterator, and metrics is a dictionary - mapping metric name to a vector of per example measurements. eval_step will - aggregate (by summing) all per example measurements and divide by the - aggregated normalizers. For each given metric we compute: - 1/N sum_{b in batch_iter} metric(b), where N is the sum of normalizer - over all batches. - - Args: - flax_model: A Flax model. - train_state: TrainState, the state of training including the current - global_step, model_state, rng, and optimizer. The buffer of this argument - can be donated to the computation. - batch: A single batch of data. a metrics function, that given logits and - batch of data, calculates the metrics as well as the loss. - metrics_fn: A metrics function, that given logits and batch of data, - calculates the metrics as well as the loss. - debug: Whether the debug mode is enabled during evaluation. - `debug=True` enables model specific logging/storing some values using - jax.host_callback. - - Returns: - Batch, predictions and calculated metrics. - """ - variables = { - 'params': train_state.optimizer.target, - **train_state.model_state - } - (logits, _) = flax_model.apply( - variables, batch['inputs'], train=False, mutable=False, debug=debug) - - metrics = metrics_fn(logits, batch) - - confusion_matrix = get_confusion_matrix( - labels=batch['label'], logits=logits, batch_mask=batch['batch_mask']) - - # Collect predictions and batches from all hosts. - predictions = jnp.argmax(logits, axis=-1) - predictions = jax.lax.all_gather(predictions, 'batch') - batch = jax.lax.all_gather(batch, 'batch') - confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') - - return batch, predictions, metrics, confusion_matrix - - -def eval1( - *, - rng: jnp.ndarray, - config: ml_collections.ConfigDict, - model_cls: Type[base_model.BaseModel], - dataset: dataset_utils.Dataset, - workdir: str, - writer: metric_writers.MetricWriter, -) -> Tuple[train_utils.TrainState, Dict[str, Any], Dict[str, Any]]: - """Main training loop lives in this function. - - Given the model class and dataset, it prepares the items needed to run the - training, including the TrainState. - - Args: - rng: Jax rng key. - config: Configurations of the experiment. - model_cls: Model class; A model has a flax_module, a loss_fn, and a - metrics_fn associated with it. - dataset: The dataset that has train_iter, eval_iter, meta_data, and - optionally, test_iter. - workdir: Directory for checkpointing. - writer: CLU metrics writer instance. - - Returns: - train_state that has the state of training (including current - global_step, model_state, rng, and the optimizer), train_summary - and eval_summary which are dict of metrics. These outputs are used for - regression testing. - - Timeline: - - Updated from scenic.train_lib.segmentation_trainer.train - """ - lead_host = jax.process_index() == 0 - # Build the loss_fn, metrics, and flax_model. - model = model_cls(config, dataset.meta_data) - - # Initialize model. - rng, init_rng = jax.random.split(rng) - (params, model_state, num_trainable_params, - gflops) = train_utils.initialize_model( - model_def=model.flax_model, - input_spec=[(dataset.meta_data['input_shape'], - dataset.meta_data.get('input_dtype', jnp.float32))], - config=config, - rngs=init_rng) - - # Create optimizer. - # We jit this, such that the arrays that are created are created on the same - # device as the input is, in this case the CPU. Else they'd be on device[0]. - optimizer = jax.jit( - optimizers.get_optimizer(config).create, backend='cpu')( - params) - rng, train_rng = jax.random.split(rng) - train_state = train_utils.TrainState( - global_step=0, - optimizer=optimizer, - model_state=model_state, - rng=train_rng, - accum_train_time=0) - start_step = train_state.global_step - - # Load pretrained backbone - if start_step == 0 and config.get('load_pretrained_backbone', False): - # TODO(kellybuchanan): check out partial loader in - # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# - - bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') - checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') - # bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') - - # Loader from scenic - # cannot restore using flax - # Mathias suggested to try flax_restore_checkpoint - # bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) - # but we get an error *** msgpack.exceptions.ExtraData: unpack(b) received extra data. - - # TODO(kellybuchanan): read config file directly from bb_model_cfg_file - restored_model_cfg = load_bb_config(config) - - if checkpoint_format == 'ub': - # import pdb; pdb.set_trace() - # load params from checkpoint - bb_train_state = pretrain_utils.convert_bigvision_to_scenic_checkpoint( - checkpoint_path=bb_checkpoint_path, - convert_to_linen=False) - - # option 1: failed as variables are a frozen dictionary - # could be used with flax.core.unfreeze, flax.core.freeze - train_state = model.init_backbone_from_train_state(train_state, - bb_train_state, - restored_model_cfg, - model_prefix_path=['backbone']) - - # option2: it fails for embeddings as this mode - # doesn't allow to specify loaded params . - # model_prefix_path = ['backbone'] - # train_state = pretrain_utils.init_from_pretrain_state( - # train_state, bb_train_state, model_prefix_path=model_prefix_path) - - - else: - raise NotImplementedError("") - - elif start_step == 0: - logging.info('Not restoring from any pretrained_backbone.') - - if config.checkpoint: - train_state, start_step = train_utils.restore_checkpoint(workdir, train_state) - else: - logging.info('Not restoring from any checkpoints.') - - # Replicate the optimzier, state, and rng. - train_state = jax_utils.replicate(train_state) - del params # Do not keep a copy of the initial params. - - # Calculate the total number of training steps. - total_steps, steps_per_epoch = train_utils.get_num_training_steps( - config, dataset.meta_data) - # Get learning rate scheduler. - #learning_rate_fn = lr_schedules.get_learning_rate_fn(config) - - ############### EVALUATION CODE ################# - eval_step_pmapped = jax.pmap( - functools.partial( - eval_step1, - flax_model=model.flax_model, - metrics_fn=model.get_metrics_fn_unc('validation'), - debug=config.debug_eval), - axis_name='batch', - # We can donate the eval_batch's buffer. - ) - - # Ceil rounding such that we include the last incomplete batch. - total_eval_steps = int( - np.ceil(dataset.meta_data['num_eval_examples'] / config.batch_size)) - steps_per_eval = config.get('steps_per_eval') or total_eval_steps - - batch_size = config.batch_size - #num_eval_examples = dataset.meta_data['num_eval_examples'] - num_eval_examples = int(steps_per_eval * config.batch_size) - - def evaluate(train_state: train_utils.TrainState, - step: int) -> Dict[str, Any]: - eval_metrics = [] - eval_all_confusion_mats = [] - # Sync model state across replicas. - train_state = train_utils.sync_model_state_across_replicas(train_state) - - def to_cpu(x): - return jax.device_get(dataset_utils.unshard(jax_utils.unreplicate(x))) - - for step_ in range(steps_per_eval): - eval_batch = next(dataset.valid_iter) - e_batch, \ - e_predictions, \ - e_metrics, \ - confusion_matrix = eval_step_pmapped(train_state=train_state, batch=eval_batch) - - eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) - - # Evaluate global metrics on one of the hosts (lead_host), but given - # intermediate values collected from all hosts. - if lead_host and global_metrics_fn is not None: - # Collect data to be sent for computing global metrics. - eval_all_confusion_mats.append(to_cpu(confusion_matrix)) - - eval_global_metrics_summary = {} - if lead_host and global_metrics_fn is not None: - eval_global_metrics_summary = global_metrics_fn(eval_all_confusion_mats, - dataset.meta_data) - ############### LOG EVAL SUMMARY ############### - #eval_summary = train_utils.log_eval_summary( - eval_summary = log_eval_summary( - - step=step, - eval_metrics=eval_metrics, - extra_eval_summary=eval_global_metrics_summary, - # writer=writer - ) - """ - eval_summary = train_utils.log_eval_summary( - step=step, - eval_metrics=eval_metrics, - extra_eval_summary=eval_global_metrics_summary, - # writer=writer - ) - # Visualize val predictions for one batch: - if lead_host: - images = _draw_side_by_side(to_cpu(e_batch), to_cpu(e_predictions)) - example_viz = { - f'val/example_{i}': image[None, ...] for i, image in enumerate(images) - } - writer.write_images(step, example_viz) - - writer.flush() - """ - #eval_summary = 0 - del eval_metrics - return eval_summary - - log_eval_steps = config.get('log_eval_steps') or steps_per_epoch - if not log_eval_steps: - raise ValueError("'log_eval_steps' should be specified in the config.") - log_summary_steps = config.get('log_summary_steps') or log_eval_steps - checkpoint_steps = config.get('checkpoint_steps') or log_eval_steps - - train_metrics, extra_training_logs = [], [] - train_summary, eval_summary = None, None - global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error - - chrono = train_utils.Chrono( - first_step=start_step, - total_steps=total_steps, - steps_per_epoch=steps_per_epoch, - global_bs=config.batch_size, - accum_train_time=int(jax_utils.unreplicate(train_state.accum_train_time))) - - logging.info('Starting training loop at step %d.', start_step + 1) - report_progress = periodic_actions.ReportProgress( - num_train_steps=total_steps, - #writer=writer - ) - hooks = [report_progress] - if config.get('xprof', True) and lead_host: - hooks.append(periodic_actions.Profile(num_profile_steps=5, logdir=workdir)) - - if start_step == 0: - raise NotImplementedError("start step should be larger") - step0_log = {'num_trainable_params': num_trainable_params} - if gflops: - step0_log['gflops'] = gflops - writer.write_scalars(1, step0_log) - - for step in range(start_step + 1, start_step + 2): - with jax.profiler.StepTraceContext('train', sfLtep_num=step): - train_batch = next(dataset.train_iter) - - # train_state, t_metrics, lr, train_predictions = train_step_pmapped( - # train_state=train_state, batch=train_batch) - # This will accumulate metrics in TPU memory up to the point that we log - # them. This is no problem for small metrics but may be a problem for - # large (e.g. segmentation) metrics. An alternative is to set - # `log_summary_steps` to a small number, or to use - # `train_utils.unreplicate_and_get` here instead of right before writing - # summaries, but that means in each step, we have data transfer between - # tpu and host, which might slow down the training. - # train_metrics.append(t_metrics) - # Additional training logs: learning rate: - # extra_training_logs.append({'learning_rate': lr}) - - for h in hooks: - h(step) - chrono.pause() # Below are once-in-a-while ops -> pause. - """ - if step % log_summary_steps == 0 or (step == total_steps): - ############### LOG TRAIN SUMMARY ############### - if lead_host: - chrono.tick(step, writer=writer) - # Visualize segmentations using side-by-side gt-pred images: - images = _draw_side_by_side( - jax.device_get(dataset_utils.unshard(train_batch)), - jax.device_get(dataset_utils.unshard(train_predictions))) - example_viz = { - f'train/example_{i}': image[None, ...] - for i, image in enumerate(images) - } - writer.write_images(step, example_viz) - - train_summary = train_utils.log_train_summary( - step=step, - train_metrics=jax.tree_map(train_utils.unreplicate_and_get, - train_metrics), - extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, - extra_training_logs), - writer=writer) - # Reset metric accumulation for next evaluation cycle. - train_metrics, extra_training_logs = [], [] - """ - #if (step % log_eval_steps == 0) or (step == total_steps): - with report_progress.timed('eval'): - # Sync model state across replicas (in case of having model state, e.g. - # batch statistic when using batch norm). - train_state = train_utils.sync_model_state_across_replicas(train_state) - eval_summary = evaluate(train_state, step) - """ - if ((step % checkpoint_steps == 0 and step > 0) or - (step == total_steps)) and config.checkpoint: - ################### CHECK POINTING ########################## - with report_progress.timed('checkpoint'): - # Sync model state across replicas. - train_state = train_utils.sync_model_state_across_replicas(train_state) - if lead_host: - train_state.replace( # pytype: disable=attribute-error - accum_train_time=chrono.accum_train_time) - train_utils.save_checkpoint(workdir, train_state) - """ - chrono.resume() # Un-pause now. - - # Wait until computations are done before exiting. - jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() - # Return the train and eval summary after last step for regresesion testing. - return train_state, train_summary, eval_summary - - -def log_eval_summary(step: int, - eval_metrics: Sequence[Dict[str, Tuple[float, int]]], - extra_eval_summary: Optional[Dict[str, Any]] = None, - summary_writer: Optional[Any] = None, - metrics_normalizer_fn: Optional[ - Callable[[Dict[str, Tuple[float, int]], str], - Dict[str, float]]] = None, - prefix: str = 'valid', - key_separator: str = '_') -> Dict[str, float]: - """Computes and logs eval metrics. - - Args: - step: Current step. - eval_metrics: Sequence of dictionaries of calculated metrics. - extra_eval_summary: A dict containing summaries that are already ready to be - logged, e.g. global metrics from eval set, like precision/recall. - summary_writer: Summary writer object. - metrics_normalizer_fn: Used for normalizing metrics. The api for - this function is: `new_metrics_dict = metrics_normalizer_fn( metrics_dict, - split)`. If set to None, we use the normalize_metrics_summary which uses - the normalizer paired with each metric to normalize it. - prefix: str; Prefix added to the name of the summaries writen by this - function. - key_separator: Separator added between the prefix and key. - - Returns: - eval summary: A dictionary of metrics. - """ - eval_metrics = train_utils.stack_forest(eval_metrics) - - # Compute the sum over all examples in all batches. - eval_metrics_summary = jax.tree_map(lambda x: x.sum(), eval_metrics) - # Normalize metrics by the total number of exampels. - metrics_normalizer_fn = ( - metrics_normalizer_fn or train_utils.normalize_metrics_summary) - eval_metrics_summary = metrics_normalizer_fn(eval_metrics_summary, 'eval') - # If None, set to an empty dictionary. - extra_eval_summary = extra_eval_summary or {} - - if jax.process_index() == 0: - message = '' - for key, val in eval_metrics_summary.items(): - message += f'{key}: {val} | ' - for key, val in extra_eval_summary.items(): - message += f'{key}: {val} | ' - logging.info('step: %d -- %s -- {%s}', step, prefix, message) - - if summary_writer is not None: - for key, val in eval_metrics_summary.items(): - summary_writer.scalar(f'{prefix}{key_separator}{key}', val, step) - for key, val in extra_eval_summary.items(): - summary_writer.scalar(f'{prefix}{key_separator}{key}', val, step) - summary_writer.flush() - - # Add extra_eval_summary to the returned eval_summary. - eval_metrics_summary.update(extra_eval_summary) - return eval_metrics_summary diff --git a/experimental/cityscapes/custom_segmentation_trainer.py b/experimental/cityscapes/custom_segmentation_trainer.py deleted file mode 100644 index 770948f94..000000000 --- a/experimental/cityscapes/custom_segmentation_trainer.py +++ /dev/null @@ -1,524 +0,0 @@ -""" -Custom segmentation_trainer.py - -# cannot import train_step, eval_step due to tuple segmenter output in ub implementation -Minor changes to account for ub models which ouput a tuple (logits, dict) -""" - -import functools -from typing import Any, Callable, Dict, Tuple, Optional, Type - -import flax.linen as nn -import jax -import jax.numpy as jnp -import jax.profiler -import ml_collections -import numpy as np -from absl import logging -from clu import metric_writers -from clu import periodic_actions -from flax import jax_utils -from jax.experimental.optimizers import clip_grads - -from scenic.dataset_lib import dataset_utils -from scenic.model_lib.base_models import base_model -from scenic.train_lib import lr_schedules -from scenic.train_lib import optimizers -from scenic.train_lib import train_utils - -# instead of importing we use local functions -# from scenic.train_lib.segmentation_trainer import train_step, eval_step, _draw_side_by_side -Batch = Dict[str, jnp.ndarray] -MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], - Dict[str, Tuple[float, int]]] -LossFn = Callable[[jnp.ndarray, Batch, Optional[jnp.ndarray]], float] - -from scenic.train_lib.segmentation_trainer import _draw_side_by_side, get_confusion_matrix -from flax.training.checkpoints import restore_checkpoint as flax_restore_checkpoint -from scenic.train_lib import pretrain_utils - -from pretrainer_utils import load_bb_config - - -def train_step( - *, - flax_model: nn.Module, - train_state: train_utils.TrainState, - batch: Batch, - learning_rate_fn: Callable[[int], float], - loss_fn: LossFn, - metrics_fn: MetricFn, - config: ml_collections.ConfigDict, - debug: Optional[bool] = False -) -> Tuple[train_utils.TrainState, Dict[str, Tuple[float, int]], float, - jnp.ndarray]: - """Runs a single step of training. - - Given the state of the training and a batch of data, computes - the loss and updates the parameters of the model. - - Note that in this code, the buffers of the first (train_state) and second - (batch) arguments are donated to the computation. - - Args: - flax_model: A Flax model. - train_state: The state of training including the current - global_step, model_state, rng, and optimizer. The buffer of this argument - can be donated to the computation. - batch: A single batch of data. The buffer of this argument can be donated to - the computation. - learning_rate_fn: learning rate scheduler which give the global_step - generates the learning rate. - loss_fn: A loss function that given logits, a batch, and parameters of the - model calculates the loss. - metrics_fn: A metrics function that given logits and batch of data, - calculates the metrics as well as the loss. - config: Configurations of the experiment. - debug: Whether the debug mode is enabled during training. - `debug=True` enables model specific logging/storing some values using - jax.host_callback. - - Returns: - Updated state of training, computed metrics, learning rate, and predictions - for logging. - """ - new_rng, rng = jax.random.split(train_state.rng) - # Bind the rng to the host/device we are on. - dropout_rng = train_utils.bind_rng_to_host_device( - rng, axis_name='batch', bind_to='device') - - def training_loss_fn(params): - variables = {'params': params, **train_state.model_state} - (logits, _), new_model_state = flax_model.apply( - variables, - batch['inputs'], - mutable=['batch_stats'], - train=True, - rngs={'dropout': dropout_rng}, - debug=debug) - loss = loss_fn(logits, batch, variables['params']) - return loss, (new_model_state, logits) - - compute_gradient_fn = jax.value_and_grad(training_loss_fn, has_aux=True) - step = train_state.global_step - lr = learning_rate_fn(step) - (train_cost, - (new_model_state, - logits)), grad = compute_gradient_fn(train_state.optimizer.target) - - del train_cost - # Re-use same axis_name as in the call to `pmap(...train_step...)` below. - grad = jax.lax.pmean(grad, axis_name='batch') - - if config.get('max_grad_norm', None) is not None: - grad = clip_grads(grad, config.max_grad_norm) - - new_optimizer = train_state.optimizer.apply_gradient(grad, learning_rate=lr) - - # Explicit weight decay, if necessary. - if config.get('explicit_weight_decay', None) is not None: - new_optimizer = new_optimizer.replace( - target=optimizers.tree_map_with_names( - functools.partial( - optimizers.decay_weight_fn, - lr=lr, - decay=config.explicit_weight_decay), - new_optimizer.target, - match_name_fn=lambda name: 'kernel' in name)) - - metrics = metrics_fn(logits, batch) - new_train_state = train_state.replace( # pytype: disable=attribute-error - global_step=step + 1, - optimizer=new_optimizer, - model_state=new_model_state, - rng=new_rng) - return new_train_state, metrics, lr, jnp.argmax(logits, axis=-1) - - -def eval_step( - *, - flax_model: nn.Module, - train_state: train_utils.TrainState, - batch: Batch, - metrics_fn: MetricFn, - debug: Optional[bool] = False -) -> Tuple[Batch, jnp.ndarray, Dict[str, Tuple[float, int]], jnp.ndarray]: - """Runs a single step of training. - - Note that in this code, the buffer of the second argument (batch) is donated - to the computation. - - Assumed API of metrics_fn is: - ```metrics = metrics_fn(logits, batch) - where batch is yielded by the batch iterator, and metrics is a dictionary - mapping metric name to a vector of per example measurements. eval_step will - aggregate (by summing) all per example measurements and divide by the - aggregated normalizers. For each given metric we compute: - 1/N sum_{b in batch_iter} metric(b), where N is the sum of normalizer - over all batches. - - Args: - flax_model: A Flax model. - train_state: TrainState, the state of training including the current - global_step, model_state, rng, and optimizer. The buffer of this argument - can be donated to the computation. - batch: A single batch of data. a metrics function, that given logits and - batch of data, calculates the metrics as well as the loss. - metrics_fn: A metrics function, that given logits and batch of data, - calculates the metrics as well as the loss. - debug: Whether the debug mode is enabled during evaluation. - `debug=True` enables model specific logging/storing some values using - jax.host_callback. - - Returns: - Batch, predictions and calculated metrics. - """ - variables = { - 'params': train_state.optimizer.target, - **train_state.model_state - } - (logits, _) = flax_model.apply( - variables, batch['inputs'], train=False, mutable=False, debug=debug) - # upscale the targets - - # labels = jnp.tile(labels, (ens_size, 1)) - # https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/batchensemble.py#L391 - # loss_fn = getattr(train_utils, config.get('loss', 'sigmoid_xent')) - metrics = metrics_fn(logits, batch) - - confusion_matrix = get_confusion_matrix( - labels=batch['label'], logits=logits, batch_mask=batch['batch_mask']) - - # Collect predictions and batches from all hosts. - predictions = jnp.argmax(logits, axis=-1) - predictions = jax.lax.all_gather(predictions, 'batch') - batch = jax.lax.all_gather(batch, 'batch') - confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') - - return batch, predictions, metrics, confusion_matrix - - -def train( - *, - rng: jnp.ndarray, - config: ml_collections.ConfigDict, - model_cls: Type[base_model.BaseModel], - dataset: dataset_utils.Dataset, - workdir: str, - writer: metric_writers.MetricWriter, -) -> Tuple[train_utils.TrainState, Dict[str, Any], Dict[str, Any]]: - """Main training loop lives in this function. - - Given the model class and dataset, it prepares the items needed to run the - training, including the TrainState. - - Args: - rng: Jax rng key. - config: Configurations of the experiment. - model_cls: Model class; A model has a flax_module, a loss_fn, and a - metrics_fn associated with it. - dataset: The dataset that has train_iter, eval_iter, meta_data, and - optionally, test_iter. - workdir: Directory for checkpointing. - writer: CLU metrics writer instance. - - Returns: - train_state that has the state of training (including current - global_step, model_state, rng, and the optimizer), train_summary - and eval_summary which are dict of metrics. These outputs are used for - regression testing. - - Timeline: - - Updated from scenic.train_lib.segmentation_trainer.train - """ - lead_host = jax.process_index() == 0 - # Build the loss_fn, metrics, and flax_model. - model = model_cls(config, dataset.meta_data) - - # Initialize model. - rng, init_rng = jax.random.split(rng) - (params, model_state, num_trainable_params, - gflops) = train_utils.initialize_model( - model_def=model.flax_model, - input_spec=[(dataset.meta_data['input_shape'], - dataset.meta_data.get('input_dtype', jnp.float32))], - config=config, - rngs=init_rng) - - # Create optimizer. - # We jit this, such that the arrays that are created are created on the same - # device as the input is, in this case the CPU. Else they'd be on device[0]. - optimizer = jax.jit( - optimizers.get_optimizer(config).create, backend='cpu')( - params) - rng, train_rng = jax.random.split(rng) - train_state = train_utils.TrainState( - global_step=0, - optimizer=optimizer, - model_state=model_state, - rng=train_rng, - accum_train_time=0) - start_step = train_state.global_step - - # Load pretrained backbone - if start_step == 0 and config.get('load_pretrained_backbone', False): - # TODO(kellybuchanan): check out partial loader in - # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# - import pdb; pdb.set_trace() - bb_checkpoint_path = config.pretrained_backbone_configs.get('checkpoint_path') - checkpoint_format = config.pretrained_backbone_configs.get('checkpoint_format', 'ub') - # bb_model_cfg_file = config.pretrained_backbone_configs.get('checkpoint_cfg') - - # Loader from scenic - # cannot restore using flax - # Mathias suggested to try flax_restore_checkpoint - # bb_train_state = flax_restore_checkpoint(bb_checkpoint_path, target=None) - # but we get an error *** msgpack.exceptions.ExtraData: unpack(b) received extra data. - - # TODO(kellybuchanan): read config file directly from bb_model_cfg_file - restored_model_cfg = load_bb_config(config) - - if checkpoint_format == 'ub': - # import pdb; pdb.set_trace() - # load params from checkpoint - bb_train_state = pretrain_utils.convert_bigvision_to_scenic_checkpoint( - checkpoint_path=bb_checkpoint_path, - convert_to_linen=False) - - # option 1: failed as variables are a frozen dictionary - # could be used with flax.core.unfreeze, flax.core.freeze - train_state = model.init_backbone_from_train_state(train_state, - bb_train_state, - restored_model_cfg, - model_prefix_path=['backbone']) - - # option2: it fails for embeddings as this mode - # doesn't allow to specify loaded params . - # model_prefix_path = ['backbone'] - # train_state = pretrain_utils.init_from_pretrain_state( - # train_state, bb_train_state, model_prefix_path=model_prefix_path) - - - else: - raise NotImplementedError("") - - elif start_step == 0: - logging.info('Not restoring from any pretrained_backbone.') - - if config.checkpoint: - train_state, start_step = train_utils.restore_checkpoint( - workdir, train_state) - else: - logging.info('Not restoring from any checkpoints.') - - # Replicate the optimzier, state, and rng. - train_state = jax_utils.replicate(train_state) - del params # Do not keep a copy of the initial params. - - # Calculate the total number of training steps. - total_steps, steps_per_epoch = train_utils.get_num_training_steps( - config, dataset.meta_data) - # Get learning rate scheduler. - learning_rate_fn = lr_schedules.get_learning_rate_fn(config) - - train_step_pmapped = jax.pmap( - functools.partial( - train_step, - flax_model=model.flax_model, - learning_rate_fn=learning_rate_fn, - loss_fn=model.loss_function, - metrics_fn=model.get_metrics_fn('train'), - config=config, - debug=config.debug_train), - axis_name='batch', - # We can donate both buffers of train_state and train_batch. - donate_argnums=(0, 1), - ) - - ############### EVALUATION CODE ################# - - eval_step_pmapped = jax.pmap( - functools.partial( - eval_step, - flax_model=model.flax_model, - metrics_fn=model.get_metrics_fn('validation'), - debug=config.debug_eval), - axis_name='batch', - # We can donate the eval_batch's buffer. - ) - - # Ceil rounding such that we include the last incomplete batch. - total_eval_steps = int( - np.ceil(dataset.meta_data['num_eval_examples'] / config.batch_size)) - steps_per_eval = config.get('steps_per_eval') or total_eval_steps - - def evaluate(train_state: train_utils.TrainState, - step: int) -> Dict[str, Any]: - eval_metrics = [] - eval_all_confusion_mats = [] - # Sync model state across replicas. - train_state = train_utils.sync_model_state_across_replicas(train_state) - def to_cpu(x): - return jax.device_get(dataset_utils.unshard(jax_utils.unreplicate(x))) - for _ in range(steps_per_eval): - eval_batch = next(dataset.valid_iter) - e_batch, e_predictions, e_metrics, confusion_matrix = eval_step_pmapped( - train_state=train_state, batch=eval_batch) - eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) - # Evaluate global metrics on one of the hosts (lead_host), but given - # intermediate values collected from all hosts. - if lead_host and global_metrics_fn is not None: - # Collect data to be sent for computing global metrics. - eval_all_confusion_mats.append(to_cpu(confusion_matrix)) - - eval_global_metrics_summary = {} - if lead_host and global_metrics_fn is not None: - eval_global_metrics_summary = global_metrics_fn(eval_all_confusion_mats, - dataset.meta_data) - - ############### LOG EVAL SUMMARY ############### - eval_summary = train_utils.log_eval_summary( - step=step, - eval_metrics=eval_metrics, - extra_eval_summary=eval_global_metrics_summary, - writer=writer) - # Visualize val predictions for one batch: - if lead_host: - images = _draw_side_by_side(to_cpu(e_batch), to_cpu(e_predictions)) - example_viz = { - f'val/example_{i}': image[None, ...] for i, image in enumerate(images) - } - writer.write_images(step, example_viz) - - writer.flush() - del eval_metrics - return eval_summary - - log_eval_steps = config.get('log_eval_steps') or steps_per_epoch - if not log_eval_steps: - raise ValueError("'log_eval_steps' should be specified in the config.") - log_summary_steps = config.get('log_summary_steps') or log_eval_steps - checkpoint_steps = config.get('checkpoint_steps') or log_eval_steps - - train_metrics, extra_training_logs = [], [] - train_summary, eval_summary = None, None - global_metrics_fn = model.get_global_metrics_fn() # pytype: disable=attribute-error - - chrono = train_utils.Chrono( - first_step=start_step, - total_steps=total_steps, - steps_per_epoch=steps_per_epoch, - global_bs=config.batch_size, - accum_train_time=int(jax_utils.unreplicate(train_state.accum_train_time))) - - logging.info('Starting training loop at step %d.', start_step + 1) - report_progress = periodic_actions.ReportProgress( - num_train_steps=total_steps, writer=writer) - hooks = [report_progress] - if config.get('xprof', True) and lead_host: - hooks.append(periodic_actions.Profile(num_profile_steps=5, logdir=workdir)) - - if start_step == 0: - step0_log = {'num_trainable_params': num_trainable_params} - if gflops: - step0_log['gflops'] = gflops - writer.write_scalars(1, step0_log) - - # Early stopping flags - best_opt_accuracy = -1 - best_epoch = 1 - current_epoch=1 - force_out = 0 - early_stopping_patience = config.get('early_stopping_patience') or 20 - - for step in range(start_step + 1, total_steps + 1): - with jax.profiler.StepTraceContext('train', sfLtep_num=step): - train_batch = next(dataset.train_iter) - train_state, t_metrics, lr, train_predictions = train_step_pmapped( - train_state=train_state, batch=train_batch) - # This will accumulate metrics in TPU memory up to the point that we log - # them. This is no problem for small metrics but may be a problem for - # large (e.g. segmentation) metrics. An alternative is to set - # `log_summary_steps` to a small number, or to use - # `train_utils.unreplicate_and_get` here instead of right before writing - # summaries, but that means in each step, we have data transfer between - # tpu and host, which might slow down the training. - train_metrics.append(t_metrics) - # Additional training logs: learning rate: - extra_training_logs.append({'learning_rate': lr}) - - for h in hooks: - h(step) - chrono.pause() # Below are once-in-a-while ops -> pause. - - if step % log_summary_steps == 0 or (step == total_steps): - ############### LOG TRAIN SUMMARY ############### - if lead_host: - chrono.tick(step, writer=writer) - # Visualize segmentations using side-by-side gt-pred images: - images = _draw_side_by_side( - jax.device_get(dataset_utils.unshard(train_batch)), - jax.device_get(dataset_utils.unshard(train_predictions))) - example_viz = { - f'train/example_{i}': image[None, ...] - for i, image in enumerate(images) - } - writer.write_images(step, example_viz) - - train_summary = train_utils.log_train_summary( - step=step, - train_metrics=jax.tree_map(train_utils.unreplicate_and_get, - train_metrics), - extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, - extra_training_logs), - writer=writer) - - # Reset metric accumulation for next evaluation cycle. - train_metrics, extra_training_logs = [], [] - - if (step % log_eval_steps == 0) or (step == total_steps): - with report_progress.timed('eval'): - # Sync model state across replicas (in case of having model state, e.g. - # batch statistic when using batch norm). - train_state = train_utils.sync_model_state_across_replicas(train_state) - eval_summary = evaluate(train_state, step) - - # here check value - #current_epoch = int(step // log_eval_steps) - val_accuracy = eval_summary['accuracy'] - if val_accuracy >= best_opt_accuracy: - best_epoch = current_epoch - best_opt_accuracy = val_accuracy - # best_opt_repl = jax.device_get(opt_repl) - else: - logging.info( - msg=(f'Current val accuracy {val_accuracy} ' - f'(vs {best_opt_accuracy})')) - if current_epoch - best_epoch >= early_stopping_patience: - logging.info(msg='Early stopping, returning best opt!') - # force checkpoint - force_out = 1 - current_epoch+=1 - - if ((step % checkpoint_steps == 0 and step > 0) or - (step == total_steps) or (force_out == 1)) and config.checkpoint: - ################### CHECK POINTING ########################## - with report_progress.timed('checkpoint'): - # Sync model state across replicas. - train_state = train_utils.sync_model_state_across_replicas(train_state) - if lead_host: - train_state.replace( # pytype: disable=attribute-error - accum_train_time=chrono.accum_train_time) - train_utils.save_checkpoint(workdir, train_state) - - if force_out == 1: - # flag turned on due to early stopping - break - - chrono.resume() # Un-pause now. - - # Wait until computations are done before exiting. - jax.random.normal(jax.random.PRNGKey(0), ()).block_until_ready() - # Return the train and eval summary after last step for regresesion testing. - return train_state, train_summary, eval_summary - diff --git a/experimental/cityscapes/deterministic.py b/experimental/cityscapes/deterministic.py deleted file mode 100644 index 754a88656..000000000 --- a/experimental/cityscapes/deterministic.py +++ /dev/null @@ -1,187 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Train vit model on cityscapes. -See run_pretrained.sh for an example -""" - -import os -import sys - -# %% -import jax -# %% -import tensorflow as tf -# %% -from absl import app -from absl import flags -from absl import logging -from ml_collections.config_flags import config_flags -from tensorflow.io import gfile - -import custom_models -import custom_segmentation_trainer -# scenic dependencies for debugging -from scenic.train_lib import train_utils - -# import train_utils # local file import - -import wandb -import pathlib -import datetime - -#%% -config_flags.DEFINE_config_file( - 'config', None, 'Training configuration.', lock_config=True) -flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') -flags.DEFINE_integer( - 'num_cores', default=None, help='Unused. How many devices being used.') -flags.DEFINE_boolean( - 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') -flags.DEFINE_string('tpu', None, - 'Unused. Name of the TPU. Only used if use_gpu is False.') -flags.DEFINE_string('dataset_service_address', None, - 'Address of the tf.data service') -FLAGS = flags.FLAGS - - -def write_note(note): - if jax.process_index() == 0: - logging.info('NOTE: %s', note) - - -from clu import metric_writers - - -def run(config, workdir): - """Prepares model, and dataset for training. - - This creates summary directories, summary writers, model definition, and - builds datasets to be sent to the main training script. - - Args: - config: ConfigDict; Hyper parameters. - workdir: string; Root directory for the experiment. - - Returns: - The outputs of trainer.train(), which are train_state, train_summary, and - eval_summary. - """ - lead_host = jax.process_index() == 0 - # set up the train_dir and log_dir - gfile.makedirs(workdir) - #workdir = os.path.join(workdir, 'trial') - #gfile.makedirs(workdir) - - summary_writer = None - if lead_host and config.write_summary: - tensorboard_dir = os.path.join(workdir, 'tb_summaries') - gfile.makedirs(tensorboard_dir) - # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) - summary_writer = metric_writers.SummaryWriter(tensorboard_dir) - - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - rng = jax.random.PRNGKey(config.rng_seed) - logging.info('rng: %s', rng) - - model_cls = custom_models.SegmenterSegmentationModel - - # ---------------------- - # Load dataset - # ---------------------- - data_rng, rng = jax.random.split(rng) - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - write_note('Loading dataset...') - - # TODO: update num_classes - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) - - return rng, model_cls, dataset, config, workdir, summary_writer - - -def main(config, output_dir): - - print('config') - print(config) - seed = config.get('rng_seed', 0) - rng = jax.random.PRNGKey(seed) - tf.random.set_seed(seed) - - # Wandb Setup - if config.use_wandb: - #pathlib.Path(config.wandb_dir).mkdir(parents=True, exist_ok=True) - gfile.makedirs(config.wandb_dir) - wandb_args = dict( - project=config.wandb_project, - entity=config.get('wandb_entity', 'ub_rdl_big_paper'), - dir=config.wandb_dir, - reinit=True, - name=config.wandb_exp_name, - group=config.wandb_exp_group, - sync_tensorboard=True) - wandb_run = wandb.init(**wandb_args) - wandb.config.update(FLAGS, allow_val_change=True) - output_dir = str( - os.path.join(output_dir, - config.wandb_exp_name - )) - else: - wandb_run = None - #output_dir = FLAGS.output_dir - - print('workdir ', output_dir) - rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) - print('workdir ', workdir) - - # ---------------------- - # Train function - # ---------------------- - train_fn = custom_segmentation_trainer.train - - train_state, train_summary, eval_summary = train_fn(rng=rng, model_cls=model_cls, dataset=dataset, - config=config, - workdir=output_dir, writer=summary_writer) - - print(train_summary) - - if wandb_run is not None: - wandb_run.finish() - return - - -if __name__ == '__main__': - # Adds jax flags to the program. - jax.config.config_with_absl() - - # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` - # function that returns values for tests and does not directly access flags, - # and then have `main` return None. - - def _main(unused_argv): - config = FLAGS.config - output_dir = FLAGS.output_dir - main(config, output_dir) - - app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/deterministic_eval.py b/experimental/cityscapes/deterministic_eval.py deleted file mode 100644 index 2a5d5bc46..000000000 --- a/experimental/cityscapes/deterministic_eval.py +++ /dev/null @@ -1,161 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Train vit model on cityscapes. - -Step 1: aim to train model on cityscapes for 1 step -# Runs with - -""" - -import os -import sys - -# %% -import jax -# %% -import tensorflow as tf -# %% -from absl import app -from absl import flags -from absl import logging -from ml_collections.config_flags import config_flags -from tensorflow.io import gfile - -import custom_models -import custom_segmentation_eval -# scenic dependencies for debugging -from scenic.train_lib import train_utils - -# import train_utils # local file import - -#%% -config_flags.DEFINE_config_file( - 'config', None, 'Training configuration.', lock_config=True) -flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') -flags.DEFINE_integer( - 'num_cores', default=None, help='Unused. How many devices being used.') -flags.DEFINE_boolean( - 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') -flags.DEFINE_string('tpu', None, - 'Unused. Name of the TPU. Only used if use_gpu is False.') -flags.DEFINE_string('dataset_service_address', None, - 'Address of the tf.data service') -FLAGS = flags.FLAGS - - -def write_note(note): - if jax.process_index() == 0: - logging.info('NOTE: %s', note) - - -from clu import metric_writers - - -def run(config, workdir): - """Prepares model, and dataset for training. - - This creates summary directories, summary writers, model definition, and - builds datasets to be sent to the main training script. - - Args: - config: ConfigDict; Hyper parameters. - workdir: string; Root directory for the experiment. - - Returns: - The outputs of trainer.train(), which are train_state, train_summary, and - eval_summary. - """ - lead_host = jax.process_index() == 0 - # set up the train_dir and log_dir - gfile.makedirs(workdir) - #workdir = os.path.join(workdir, 'trial') - #gfile.makedirs(workdir) - - summary_writer = None - if lead_host and config.write_summary: - tensorboard_dir = os.path.join(workdir, 'tb_summaries') - gfile.makedirs(tensorboard_dir) - # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) - summary_writer = metric_writers.SummaryWriter(tensorboard_dir) - - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - rng = jax.random.PRNGKey(config.rng_seed) - logging.info('rng: %s', rng) - - model_cls = custom_models.SegmenterSegmentationModel - - # ---------------------- - # Load dataset - # ---------------------- - data_rng, rng = jax.random.split(rng) - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - write_note('Loading dataset...') - - # TODO: update num_classes - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) - - return rng, model_cls, dataset, config, workdir, summary_writer - - -def main(config, output_dir): - - print('config') - print(config) - seed = config.get('rng_seed', 0) - rng = jax.random.PRNGKey(seed) - tf.random.set_seed(seed) - - print('workdir ', output_dir) - rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) - print('workdir ', workdir) - - # ---------------------- - # Eval function - # ---------------------- - eval_fn = custom_segmentation_eval.eval1 - - train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, - config=config, - workdir=output_dir, writer=summary_writer) - - print(eval_summary) - return - - -if __name__ == '__main__': - # Adds jax flags to the program. - jax.config.config_with_absl() - - # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` - # function that returns values for tests and does not directly access flags, - # and then have `main` return None. - - def _main(unused_argv): - config = FLAGS.config - output_dir = FLAGS.output_dir - main(config, output_dir) - - app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/deterministic_eval_gcp.py b/experimental/cityscapes/deterministic_eval_gcp.py deleted file mode 100644 index a968ca1c1..000000000 --- a/experimental/cityscapes/deterministic_eval_gcp.py +++ /dev/null @@ -1,174 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Eval vit model on cityscapes. - -Step 1: aim to train model on cityscapes for 1 step -# Runs with - -""" - -import os -import sys - -# %% -import jax -# %% -import tensorflow as tf -# %% -from absl import app -from absl import flags -from absl import logging -from ml_collections.config_flags import config_flags -from tensorflow.io import gfile - -import custom_models -import custom_segmentation_eval -# scenic dependencies for debugging -from scenic.train_lib import train_utils - -# import train_utils # local file import -import pandas as pd - -#%% -config_flags.DEFINE_config_file( - 'config', None, 'Training configuration.', lock_config=True) -flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') -flags.DEFINE_integer( - 'num_cores', default=None, help='Unused. How many devices being used.') -flags.DEFINE_boolean( - 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') -flags.DEFINE_string('tpu', None, - 'Unused. Name of the TPU. Only used if use_gpu is False.') -flags.DEFINE_string('dataset_service_address', None, - 'Address of the tf.data service') -FLAGS = flags.FLAGS - - -def write_note(note): - if jax.process_index() == 0: - logging.info('NOTE: %s', note) - - -from clu import metric_writers - - -def run(config, workdir): - """Prepares model, and dataset for training. - - This creates summary directories, summary writers, model definition, and - builds datasets to be sent to the main training script. - - Args: - config: ConfigDict; Hyper parameters. - workdir: string; Root directory for the experiment. - - Returns: - The outputs of trainer.train(), which are train_state, train_summary, and - eval_summary. - """ - lead_host = jax.process_index() == 0 - # set up the train_dir and log_dir - gfile.makedirs(workdir) - #workdir = os.path.join(workdir, 'trial') - #gfile.makedirs(workdir) - - summary_writer = None - if lead_host and config.write_summary: - tensorboard_dir = os.path.join(workdir, 'tb_summaries') - gfile.makedirs(tensorboard_dir) - # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) - summary_writer = metric_writers.SummaryWriter(tensorboard_dir) - - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - rng = jax.random.PRNGKey(config.rng_seed) - logging.info('rng: %s', rng) - - model_cls = custom_models.SegmenterSegmentationModel - - # ---------------------- - # Load dataset - # ---------------------- - data_rng, rng = jax.random.split(rng) - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - write_note('Loading dataset...') - - # TODO: update num_classes - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) - - return rng, model_cls, dataset, config, workdir, summary_writer - - -def main(config, output_dir): - - print('config') - print(config) - seed = config.get('rng_seed', 0) - rng = jax.random.PRNGKey(seed) - tf.random.set_seed(seed) - - print('workdir ', output_dir) - rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) - print('workdir ', workdir) - - # ---------------------- - # Eval function - # ---------------------- - eval_fn = custom_segmentation_eval.eval1 - - # models - for rng_seed in [0,1,2,3,4]: - for train_mode in ["deterministic","scratch","gp"]: - for train_split in [100,75, 50, 25, 10]: - run_name="{}_split{}_seed{}".format(train_mode, train_split, rng_seed) - tmp_workdir="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/{}".format(run_name) - print("temp directory", tmp_workdir) - tmp_resultsdir="results/metrics/{}.csv".format(run_name) - #import pdb; pdb.set_trace(); - train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, - config=config, - workdir=tmp_workdir, writer=summary_writer) - print(eval_summary) - #import pdb;pdb.set_trace() - df = pd.DataFrame([eval_summary]) - df.to_csv (r'{}'.format(tmp_resultsdir), index = False, header=True) - - return - - -if __name__ == '__main__': - # Adds jax flags to the program. - jax.config.config_with_absl() - - # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` - # function that returns values for tests and does not directly access flags, - # and then have `main` return None. - - def _main(unused_argv): - config = FLAGS.config - output_dir = FLAGS.output_dir - main(config, output_dir) - - app.run(_main) # Ignore the returned values from `main`. diff --git a/experimental/cityscapes/deterministic_eval_l32.py b/experimental/cityscapes/deterministic_eval_l32.py deleted file mode 100644 index 09571dd07..000000000 --- a/experimental/cityscapes/deterministic_eval_l32.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Eval vit model on cityscapes. -""" - -import os -import sys - -# %% -import jax -# %% -import tensorflow as tf -# %% -from absl import app -from absl import flags -from absl import logging -from ml_collections.config_flags import config_flags -from tensorflow.io import gfile - -import custom_models -import custom_segmentation_eval -# scenic dependencies for debugging -from scenic.train_lib import train_utils - -# import train_utils # local file import -import pandas as pd - -#%% -config_flags.DEFINE_config_file( - 'config', None, 'Training configuration.', lock_config=True) -flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') -flags.DEFINE_integer( - 'num_cores', default=None, help='Unused. How many devices being used.') -flags.DEFINE_boolean( - 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') -flags.DEFINE_string('tpu', None, - 'Unused. Name of the TPU. Only used if use_gpu is False.') -flags.DEFINE_string('dataset_service_address', None, - 'Address of the tf.data service') -FLAGS = flags.FLAGS - - -def write_note(note): - if jax.process_index() == 0: - logging.info('NOTE: %s', note) - - -from clu import metric_writers - - -def run(config, workdir): - """Prepares model, and dataset for training. - - This creates summary directories, summary writers, model definition, and - builds datasets to be sent to the main training script. - - Args: - config: ConfigDict; Hyper parameters. - workdir: string; Root directory for the experiment. - - Returns: - The outputs of trainer.train(), which are train_state, train_summary, and - eval_summary. - """ - lead_host = jax.process_index() == 0 - # set up the train_dir and log_dir - gfile.makedirs(workdir) - #workdir = os.path.join(workdir, 'trial') - #gfile.makedirs(workdir) - - summary_writer = None - if lead_host and config.write_summary: - tensorboard_dir = os.path.join(workdir, 'tb_summaries') - gfile.makedirs(tensorboard_dir) - # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) - summary_writer = metric_writers.SummaryWriter(tensorboard_dir) - - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - rng = jax.random.PRNGKey(config.rng_seed) - logging.info('rng: %s', rng) - - model_cls = custom_models.SegmenterSegmentationModel - - # ---------------------- - # Load dataset - # ---------------------- - data_rng, rng = jax.random.split(rng) - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - write_note('Loading dataset...') - - # TODO: update num_classes - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) - - return rng, model_cls, dataset, config, workdir, summary_writer - - -def main(config, output_dir): - - print('config') - print(config) - seed = config.get('rng_seed', 0) - rng = jax.random.PRNGKey(seed) - tf.random.set_seed(seed) - - print('workdir ', output_dir) - rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) - print('workdir ', workdir) - - # ---------------------- - # Eval function - # ---------------------- - eval_fn = custom_segmentation_eval.eval1 - - # models - for rng_seed in [0]: #,1,2,3,4]: - for train_mode in ["deterministic"]:#,"scratch","gp"]: - for train_split in [100]: - run_name="{}_split{}_seed{}".format(train_mode, train_split, rng_seed) - tmp_workdir="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/{}".format(run_name) - print("temp directory", tmp_workdir) - tmp_resultsdir="results_l32/metrics/{}.csv".format(run_name) - #import pdb; pdb.set_trace(); - train_state, train_summary, eval_summary = eval_fn(rng=rng, model_cls=model_cls, dataset=dataset, - config=config, - workdir=tmp_workdir, writer=summary_writer) - print(eval_summary) - #import pdb;pdb.set_trace() - df = pd.DataFrame([eval_summary]) - df.to_csv (r'{}'.format(tmp_resultsdir), index = False, header=True) - - return - - -if __name__ == '__main__': - # Adds jax flags to the program. - jax.config.config_with_absl() - - # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` - # function that returns values for tests and does not directly access flags, - # and then have `main` return None. - - def _main(unused_argv): - config = FLAGS.config - output_dir = FLAGS.output_dir - main(config, output_dir) - - app.run(_main) # Ignore the returned values from `main`. diff --git a/experimental/cityscapes/ensemble_eval.py b/experimental/cityscapes/ensemble_eval.py deleted file mode 100644 index 2cac1fa2a..000000000 --- a/experimental/cityscapes/ensemble_eval.py +++ /dev/null @@ -1,254 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Train vit model on cityscapes. - -Evaluate ensemble -""" - -import os -import sys - -# %% -import jax -import flax -import numpy as np -import jax.numpy as jnp -from flax.training import checkpoints - -# %% -import tensorflow as tf -# %% -from absl import app -from absl import flags -from absl import logging -from ml_collections.config_flags import config_flags -from tensorflow.io import gfile - -import custom_models -import custom_segmentation_eval -# scenic dependencies for debugging -from scenic.train_lib import train_utils -from scenic.model_lib.base_models import model_utils -from scenic.model_lib.base_models.segmentation_model import num_pixels - -# import train_utils # local file import - -#%% -config_flags.DEFINE_config_file( - 'config', None, 'Training configuration.', lock_config=True) -flags.DEFINE_string('output_dir', default=None, help='Work unit directory.') -flags.DEFINE_string('checkpoint_dir', default=None, help='Checkpoint directory.') - -flags.DEFINE_integer( - 'num_cores', default=None, help='Unused. How many devices being used.') -flags.DEFINE_boolean( - 'use_gpu', default=None, help='Unused. Whether or not running on GPU.') -flags.DEFINE_string('tpu', None, - 'Unused. Name of the TPU. Only used if use_gpu is False.') -flags.DEFINE_string('dataset_service_address', None, - 'Address of the tf.data service') -FLAGS = flags.FLAGS - - -def write_note(note): - if jax.process_index() == 0: - logging.info('NOTE: %s', note) - - -from clu import metric_writers - - - -def parse_checkpoint_dir(checkpoint_dir): - """Parse directory of checkpoints.""" - paths = [] - subdirectories = tf.io.gfile.glob(os.path.join(checkpoint_dir, '*')) - #is_checkpoint = lambda f: ('checkpoint' in f and '.index' in f) - is_checkpoint = lambda f: ('checkpoint' in f) - - for subdir in subdirectories: - for path, _, files in tf.io.gfile.walk(subdir): - if any(f for f in files if is_checkpoint(f)): - latest_checkpoint_without_suffix = flax.training.checkpoints.latest_checkpoint(path) - paths.append(latest_checkpoint_without_suffix) - break - return paths - - -def run(config, workdir): - """Prepares model, and dataset for training. - - This creates summary directories, summary writers, model definition, and - builds datasets to be sent to the main training script. - - Args: - config: ConfigDict; Hyper parameters. - workdir: string; Root directory for the experiment. - - Returns: - The outputs of trainer.train(), which are train_state, train_summary, and - eval_summary. - """ - lead_host = jax.process_index() == 0 - # set up the train_dir and log_dir - gfile.makedirs(workdir) - #workdir = os.path.join(workdir, 'trial') - #gfile.makedirs(workdir) - - summary_writer = None - if lead_host and config.write_summary: - tensorboard_dir = os.path.join(workdir, 'tb_summaries') - gfile.makedirs(tensorboard_dir) - # summary_writer = tensorboard.SummaryWriter(tensorboard_dir) - summary_writer = metric_writers.SummaryWriter(tensorboard_dir) - - device_count = jax.device_count() - logging.info('device_count: %d', device_count) - logging.info('num_hosts : %d', jax.process_count()) - logging.info('host_id : %d', jax.process_index()) - - rng = jax.random.PRNGKey(config.rng_seed) - logging.info('rng: %s', rng) - - # ---------------------- - # Call Model - # ---------------------- - - model_cls = custom_models.SegmenterSegmentationModel - - # ---------------------- - # Load dataset - # ---------------------- - data_rng, rng = jax.random.split(rng) - # set resource limit to debug in mac osx (see https://github.com/tensorflow/datasets/issues/1441) - if jax.process_index() == 0 and sys.platform == 'darwin': - import resource - low, high = resource.getrlimit(resource.RLIMIT_NOFILE) - resource.setrlimit(resource.RLIMIT_NOFILE, (high, high)) - - - eval_dataset_name = config.get('eval_dataset_name', 'cityscapes_val') - - write_note('Loading dataset... {}'.format(eval_dataset_name)) - - # TODO: update num_classes - if eval_dataset_name == 'cityscapes_val': - dataset = train_utils.get_dataset( - config, data_rng, dataset_service_address=FLAGS.dataset_service_address) - - - return rng, model_cls, dataset, config, workdir, summary_writer - - -def main(config, output_dir,checkpoint_dir): - - print('config') - print(config) - seed = config.get('rng_seed', 0) - rng = jax.random.PRNGKey(seed) - tf.random.set_seed(seed) - - # ---------------------- - # Get dataset - # ---------------------- - - print('workdir ', output_dir) - rng, model_cls, dataset, config, workdir, summary_writer = run(config, output_dir) - - num_eval_examples = dataset.meta_data['num_eval_examples'] - num_eval_steps = int(np.ceil(num_eval_examples / config.batch_size)) - assert config.batch_size == 1 - - # ---------------------- - # Buils Model - # ---------------------- - - # Build dummy input - input_shape = [1] + list(dataset.meta_data['input_shape'][1:]) - #input_shape = dataset.meta_data['input_shape'] - in_st = dataset.meta_data['input_dtype'] - - dummy_input = jnp.zeros(input_shape, in_st.dtype) - - # Init model - rng, init_rng = jax.random.split(rng) - model = model_cls(config, dataset.meta_data) # extracting number of classes in meta_data - flax_model = model.flax_model - init_model_state, init_params = flax_model.init( - init_rng, dummy_input, train=False, debug=False).pop('params') - - - ensemble_filenames = parse_checkpoint_dir(checkpoint_dir) - ensemble_size = len(ensemble_filenames) - - # ---------------------- - # Evaluate models - # ---------------------- - num_eval_steps = 1 - dataset_name='trial' - # dict_keys(['batch_mask', 'inputs', 'label']) - - # ------------------------------- - # Write Model Predictions to file - # ------------------------------- - - # TODO: reset iterator - test_iterator = dataset.valid_iter - #import pdb; pdb.set_trace() - for m, ensemble_filename in enumerate(ensemble_filenames): - #train_state = checkpoints.restore_checkpoint(ensemble_filename, init_model_state) - - variables = {'params': init_params, **init_model_state} - - # assume only one test_set - #test_iterator = iter(test_dataset) - for _ in range(num_eval_steps): # num_eval_steps - eval_batch = next(dataset.valid_iter) #dict_keys(['batch_mask', 'inputs', 'label']) - inputs = eval_batch['inputs'][0] - logits, outs = flax_model.apply(variables, inputs, train=False, mutable=False) - - targets = eval_batch['label'][0] - weights = eval_batch['batch_mask'][0] - one_hot_targets = flax.training.common_utils.onehot(targets, dataset.meta_data['num_classes']) - - correct = model_utils.weighted_correctly_classified(logits, one_hot_targets, weights) - - number_pixels = num_pixels(logits,one_hot_targets,weights) - - accuracy = correct.sum()/number_pixels - - loss = model_utils.weighted_softmax_cross_entropy(logits, one_hot_targets, weights) - - - return - - -if __name__ == '__main__': - # Adds jax flags to the program. - jax.config.config_with_absl() - - # TODO(dusenberrymw): Refactor `main` such that there is a `train_eval` - # function that returns values for tests and does not directly access flags, - # and then have `main` return None. - - def _main(unused_argv): - config = FLAGS.config - output_dir = FLAGS.output_dir - checkpoint_dir = FLAGS.checkpoint_dir - main(config, output_dir, checkpoint_dir) - - app.run(_main) # Ignore the returned values from `main`. \ No newline at end of file diff --git a/experimental/cityscapes/experiments/cityscapes_segvit_config.py b/experimental/cityscapes/experiments/cityscapes_segvit_config.py deleted file mode 100644 index 3fd2a2676..000000000 --- a/experimental/cityscapes/experiments/cityscapes_segvit_config.py +++ /dev/null @@ -1,227 +0,0 @@ -# pylint: disable=line-too-long -r"""Default configs for Cityscapes segmentation. - -Launch a TPU job: -gxm third_party/py/scenic/google/xm/launch_xm.py -- \ - --binary //third_party/py/scenic/projects/segvit:main \ - --config='third_party/py/scenic/projects/segvit/configs/cityscapes_segvit_config.py' \ - --platform=df_8x8 --xm_resource_alloc=group:brain/grand-vision-xm-df \ - --priority=115 \ - --exp_name=cityscapes_segvit \ - --notes "R50-ViT-B/16 1024x2048 sweep" - -Test run: xid/27318283 -Performance: ~78% mIoU (WID 9 https://flatboard.corp.google.com/plot/hu4ooWrx4t0) - -""" -# pylint: enable=line-too-long - -import ml_collections - -_CITYSCAPES_TRAIN_SIZE = 2975 - -# Model specs. -VIT_SIZE = 'B' -STRIDE = 16 -RESNET_SIZE = 50 -CLASSIFIER = 'token' - -# JFT pretrained models derived from: -# https://colab.corp.google.com/drive/1GNO2D-BhZGX8UARyZCQ8xfhlCea42yx9#scrollTo=UXdJdTS6rfsx -MODEL_PATHS = { - ('B', 32, 50, 'token'): - '/cns/tp-d/home/dune/task_adapt/xzhai/tmp/hybrid/17221856/5/checkpoint.npz', - ('B', 16, 50, 'token'): - '/cns/tp-d/home/dune/task_adapt/xzhai/tmp/hybrid/17221856/6/checkpoint.npz', - ('B', 32, None, 'token'): - '/cns/tp-d/home/brain-ber/adosovitskiy/17084881/1/checkpoint.npz', - ('B', 16, None, 'token'): - '/cns/vz-d/home/brain-ber/adosovitskiy/17402132/1/checkpoint.npz', - ('L', 32, 50, 'token'): - '/cns/tp-d/home/brain-ber/adosovitskiy/17215117/1/checkpoint.npz', - ('L', 16, 50, 'token'): - '/cns/tp-d/home/brain-ber/adosovitskiy/17193867/2/checkpoint.npz', - ('L', 32, None, 'token'): - '/cns/lu-d/home/brain-ber/adosovitskiy/17085772/1/checkpoint.npz', - ('L', 16, None, 'token'): - '/cns/tp-d/home/brain-ber/adosovitskiy/17192124/1/checkpoint.npz', -} - -MODEL_PATH = MODEL_PATHS[(VIT_SIZE, STRIDE, RESNET_SIZE, CLASSIFIER)] - - -def get_config(): - """Returns the configuration for Cityscapes segmentation.""" - config = ml_collections.ConfigDict() - config.experiment_name = 'cityscapes_segvit' - - # dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = (1024, 2048) - - # model - config.model_name = 'segmenter' - config.model = ml_collections.ConfigDict() - - config.model.patches = ml_collections.ConfigDict() - config.model.patches.size = (STRIDE, STRIDE) - - config.model.backbone = ml_collections.ConfigDict() - config.model.backbone.type = 'vit_plus' - config.model.backbone.body = get_backbone_config(config) - - # decoder - config.model.decoder = ml_collections.ConfigDict() - config.model.decoder.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - num_training_epochs = ml_collections.FieldReference(100) - config.num_training_epochs = num_training_epochs - config.batch_size = 128 - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # init - config.init_from = ml_collections.ConfigDict() - config.init_from.codebase = 'bigvision' - config.init_from.checkpoint_path = MODEL_PATH - config.init_from.xm = None - config.init_from.model_prefix_path = ['backbone', 'resformer'] - - # logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = False # debug mode during training - config.debug_eval = False # debug mode during eval - config.log_eval_steps = 200 - return config - - -def get_backbone_config(config): - """Get ViT+-style ViT backbone configs.""" - body_config = ml_collections.ConfigDict() - body_config.type = 'resformer' - body_config.patches = None - if RESNET_SIZE == 50: - body_config.resnet = ml_collections.ConfigDict() - body_config.resnet.depth = (3, 4, 6, 3) - body_config.resnet.width = 1.0 - elif RESNET_SIZE is None: - body_config.patches = {'size': (STRIDE, STRIDE)} - else: - raise NotImplementedError('') - - if RESNET_SIZE and STRIDE == 16: - depth = body_config.resnet.depth - depth = depth[:-2] + (sum(depth[-2:]),) - body_config.resnet.depth = depth - - body_config.transformer = ml_collections.ConfigDict() - body_config.transformer.dropout_rate = 0.1 - - if VIT_SIZE == 'B': - body_config.transformer.mlp_dim = 3072 - body_config.transformer.num_heads = 12 - body_config.transformer.num_layers = 12 - body_config.hidden_size = 768 - elif VIT_SIZE == 'L': - body_config.transformer.mlp_dim = 4096 - body_config.transformer.num_heads = 16 - body_config.transformer.num_layers = 24 - body_config.hidden_size = 1024 - else: - raise NotImplementedError('') - - body_config.classifier = CLASSIFIER - body_config.representation_size = None - - body_config.grid_size = ( - config.dataset_configs.target_size[0] // STRIDE, - config.dataset_configs.target_size[1] // STRIDE, - ) - - return body_config - - -def model(hyper, vit_size, stride, resnet_size, classifier): - """Defines models for sweep.""" - overwrites = [] - if resnet_size == 50: - depth = (3, 4, 6, 3) - if stride == 16: - depth = depth[:-2] + (sum(depth[-2:]),) - overwrites.append( - hyper.sweep('config.model.backbone.body.resnet.depth', [depth])) - overwrites.append( - hyper.sweep('config.model.backbone.body.resnet.width', [1.0])) - overwrites.append(hyper.sweep('config.model.backbone.body.patches', [None])) - elif resnet_size is None: - overwrites.append( - hyper.sweep('config.model.backbone.body.patches', [{ - 'size': (stride, stride) - }])) - else: - raise NotImplementedError('') - - if vit_size == 'B': - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.mlp_dim', [3072])) - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.num_heads', [12])) - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.num_layers', [12])) - overwrites.append( - hyper.sweep('config.model.backbone.body.hidden_size', [768])) - elif vit_size == 'L': - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.mlp_dim', [4096])) - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.num_heads', [16])) - overwrites.append( - hyper.sweep('config.model.backbone.body.transformer.num_layers', [24])) - overwrites.append( - hyper.sweep('config.model.backbone.body.hidden_size', [1024])) - else: - raise NotImplementedError('') - - overwrites.append( - hyper.sweep('config.model.backbone.body.classifier', [classifier])) - overwrites.append( - hyper.sweep('config.init_from.checkpoint_path', - [MODEL_PATHS[(vit_size, stride, resnet_size, classifier)]])) - - return hyper.product(overwrites) - - -def get_hyper(hyper): - """Defines the hyper-parameters sweeps for doing grid search.""" - models = hyper.chainit([ - model(hyper, 'B', 16, RESNET_SIZE, CLASSIFIER), - ]) - - return hyper.product([models]) \ No newline at end of file diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py deleted file mode 100644 index 710c16bef..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (128, 128) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py deleted file mode 100644 index fc997ce33..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes1.py +++ /dev/null @@ -1,177 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 1 -STRIDE = 4 - -target_size=(128, 128) -train_split='train' -# debug on mac -if DEBUG == 1: - batch_size = 1 - number_train_examples_debug = 29 - number_eval_examples_debug = 29 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/small vit - train_split='train[:1%]' -elif DEBUG == 2: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/regular vit -elif DEBUG == 3: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 -elif DEBUG == 4: - target_size =(128, 128) - STRIDE=16 - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 -elif DEBUG == 5: - target_size =(128, 128) - STRIDE=32 - batch_size=1 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 4096 - num_heads = 16 - num_layers = 24 - hidden_size = 1024 - - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split=train_split - # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - config.num_classes = 19 - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.steps_per_epoch = steps_per_epoch - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps #200 - - # extra - config.args = {} - - return config - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py deleted file mode 100644 index 7ce7eb2d1..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes3.py +++ /dev/null @@ -1,137 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -include wandb parameters -""" -# pylint: enable=line-too-long - -import ml_collections -import os.path -import datetime - -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 1 -STRIDE = 4 - -target_size=(128, 128) - -batch_size = 1 -number_train_examples_debug = 29 -number_eval_examples_debug = 29 -num_training_epochs = ml_collections.FieldReference(1) - -mlp_dim = 2 -num_heads = 1 -num_layers = 1 -hidden_size = 1 -train_split = 'train[:1%]' - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - # flags to debug scenic - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - config.num_classes = 19 - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.steps_per_epoch = config.dataset_configs.get_ref('number_train_examples_debug') // config.get_ref('batch_size') - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' - #config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') - config.lr_configs.steps_per_cycle = config.get_ref('num_training_epochs') * config.get_ref('steps_per_epoch') - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - config.upstream_model = 'scratch' - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') - - # wabdb - config.use_wandb =True - config.wandb_dir = 'wandb' - config.wandb_project = 'rdl-visual' # Wandb project name. - config.wandb_entity = 'ub_rdl_big_paper' - #config.wandb_exp_name = None # Give experiment a name. - config.wandb_exp_name = ( - os.path.splitext(os.path.basename(__file__))[0] + '_' + - datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) - config.wandb_exp_group = None # Give experiment a group name. - - config.early_stopping_patience = 3 # number of epochs to wait before stopping training - return config - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py deleted file mode 100644 index 57a65b1eb..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes512.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py deleted file mode 100644 index b0c7cd649..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (128, 128) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py b/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py deleted file mode 100644 index 53de0e270..000000000 --- a/experimental/cityscapes/experiments/imagenet21k_segmenter_cityscapes_sngp512.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py deleted file mode 100644 index f5e7c2c20..000000000 --- a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=32 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py b/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py deleted file mode 100644 index 0eb9434d1..000000000 --- a/experimental/cityscapes/experiments/pod/imagenet21k_segmenter_cityscapes_sngp.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=32 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py b/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py deleted file mode 100644 index 6fa9d4c49..000000000 --- a/experimental/cityscapes/experiments/pod/segmenter_cityscapes.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=32 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/segmenter_cityscapes.py b/experimental/cityscapes/experiments/segmenter_cityscapes.py deleted file mode 100644 index 66387976d..000000000 --- a/experimental/cityscapes/experiments/segmenter_cityscapes.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (128, 128) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/segmenter_cityscapes512.py b/experimental/cityscapes/experiments/segmenter_cityscapes512.py deleted file mode 100644 index 3dc9a7c99..000000000 --- a/experimental/cityscapes/experiments/segmenter_cityscapes512.py +++ /dev/null @@ -1,153 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py deleted file mode 100644 index 8dfe8fb56..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=10 - -# we will have 4 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - if DEBUG == 5: - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - if DEBUG == 5: - steps_per_epoch = number_train_examples_debug // config.batch_size - else: - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py deleted file mode 100644 index aefdd5acc..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_100.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=100 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py deleted file mode 100644 index 2dc1acc70..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_25.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=25 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py deleted file mode 100644 index 32bcbad82..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_50.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=50 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py deleted file mode 100644 index 4a9524030..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_75.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=75 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py deleted file mode 100644 index eadb161b7..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_10.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=10 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py deleted file mode 100644 index 6d3543d5c..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_100.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=100 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py deleted file mode 100644 index 541a1e070..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_25.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=25 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py deleted file mode 100644 index ab0e2f50a..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_50.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=50 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py deleted file mode 100644 index da1adb5da..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_gp_75.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=75 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = True -PRETRAIN_BACKBONE_TYPE = 'gp' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py deleted file mode 100644 index bae6af57e..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_10.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=10 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py deleted file mode 100644 index 615a5312c..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_100.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=100 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py deleted file mode 100644 index 8e301c7f7..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_25.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=25 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py deleted file mode 100644 index 9c65fbaa0..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_50.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=50 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py b/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py deleted file mode 100644 index ea4030188..000000000 --- a/experimental/cityscapes/experiments/splits/imagenet21k_segmenter_cityscapes_scratch_75.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -TRAIN_PROP=75 - -# we will have 5 version of train split -if TRAIN_PROP == 100: - _CITYSCAPES_TRAIN_SIZE_SPLIT = _CITYSCAPES_TRAIN_SIZE - train_split = 'train' -elif TRAIN_PROP == 75: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 2231 - train_split = 'train[:75%]' -elif TRAIN_PROP == 50: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 1488 - train_split = 'train[:50%]' -elif TRAIN_PROP == 25: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 744 - train_split = 'train[:25%]' -elif TRAIN_PROP == 10: - _CITYSCAPES_TRAIN_SIZE_SPLIT = 298 - train_split = 'train[:10%]' - -target_size = (512, 512) -LOAD_PRETRAINED_BACKBONE = False -PRETRAIN_BACKBONE_TYPE = 'base' - -STRIDE=16 -batch_size=8 -num_training_epochs = 100 # ml_collections.FieldReference(100) -log_eval_steps = 200 - -mlp_dim = 3072 -num_heads = 12 -num_layers = 12 -hidden_size = 768 - -if DEBUG ==5: - number_train_examples_debug = 16 - -def get_config(): - """Config for cityscapes segmentation.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub_init' - - #dataset - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - - # flags to debug scenic on mac - #config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - # model - config.model_name = 'segmenter_pretrained_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.classifier = 'gap' - #config.backbone_configs.grid_size - config.backbone_configs.hidden_size = hidden_size - #config.backbone_configs.patches - #config.backbone_configs.representation_size = None - - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - - #decoder - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.batch_size - #steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * steps_per_epoch #log_eval_steps # 200 - - # extra - config.args = {} - - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = PRETRAIN_BACKBONE_TYPE - - if PRETRAIN_BACKBONE_TYPE == 'base': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16/ImagetNet21k_ViT-B:16_28592399.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16.py" - elif PRETRAIN_BACKBONE_TYPE == 'gp': - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-B16-GP/ImageNet21k_ViT-B:16-GP_29240948.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/main/baselines/jft/experiments/imagenet21k_vit_base16_sngp.py" - else: - raise NotImplementedError("") - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py deleted file mode 100644 index e1bf272f6..000000000 --- a/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py +++ /dev/null @@ -1,166 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -include wandb parameters -""" -# pylint: enable=line-too-long - -import os -import datetime -import ml_collections -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 0 - -STRIDE = 32 -target_size=(512, 512) - -batch_size = 8 -number_train_examples_debug = _CITYSCAPES_TRAIN_SIZE -num_training_epochs = ml_collections.FieldReference(100) - -mlp_dim = 4096 -num_heads = 16 -num_layers = 24 -hidden_size = 1024 -train_split = 'train' - -LOAD_PRETRAINED_BACKBONE=True - -if DEBUG ==1: - STRIDE = 4 - target_size = (128, 128) - - batch_size = 1 - number_train_examples_debug = 29 - num_training_epochs = ml_collections.FieldReference(1) - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 - train_split = 'train[:1%]' - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - config.dataset_configs.train_split = train_split - # flags to debug scenic - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - #config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - config.num_classes = 19 - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.trainer_name = 'segvit_trainer' - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.steps_per_epoch = config.dataset_configs.get_ref('number_train_examples_debug') // config.get_ref('batch_size') - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') - config.lr_configs.steps_per_cycle = config.get_ref('num_training_epochs') * config.get_ref('steps_per_epoch') - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - # load pretrained backbone - config.upstream_model = 'deterministic' - config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE - config.pretrained_backbone_configs = get_pretrained_backbone_config(config) - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') - - # wabdb - config.use_wandb=True - config.wandb_dir= 'wandb' - config.wandb_project = 'rdl-visual' # Wandb project name. - config.wandb_entity = 'ub_rdl_big_paper' - config.wandb_exp_name = None # Give experiment a name. - config.wandb_exp_name = ( - os.path.splitext(os.path.basename(__file__))[0] + '_' + - datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) - config.wandb_exp_group = None # Give experiment a group name. - - config.early_stopping_patience = 3 # number of epochs to wait before stopping training - return config - - -def get_pretrained_backbone_config(config): - if not config.load_pretrained_backbone: - return None - pretrained_backbone_configs = ml_collections.ConfigDict() - pretrained_backbone_configs.checkpoint_format = "ub" - pretrained_backbone_configs.type = 'base' - - pretrained_backbone_configs.checkpoint_path = "gs://ub-checkpoints/ImageNet21k_ViT-L32/1/checkpoint.npz" - pretrained_backbone_configs.checkpoint_cfg = "https://github.com/google/uncertainty-baselines/blob/4097549f62ca5e209c6f1ca244fe178b53b6cff4/baselines/jft/experiments/jft300m_vit_l32_finetune.py" - - return pretrained_backbone_configs - - -def get_sweep(hyper): - return hyper.product([]) diff --git a/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml b/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml deleted file mode 100644 index f9e3d353a..000000000 --- a/experimental/cityscapes/experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: vit-imagenet21k_segmenter_deterministic -program: deterministic.py -method: grid -project: rdl-debug -entity: ekellbuch - -metric: - name: valid_loss - goal: minimize -parameters: - config.wandb_project : - value: ${{project}} - config.wandb_entity : - value: ${{entity}} - config.rng_seed: - values: [0, 1, 2, 3] - config.dataset_configs.train_split: - value: train - config.dataset_configs.number_train_examples_debug: - value: 2975 - config.lr_configs.base_learning_rate: - values: [0.0001, 0.003, 0.01, 0.001] - config.batch_size: - value: 8 - config.num_training_epochs: - values: [50, 100, 30, 75] - - - - -command: - - ${env} - - python - - ${program} - - "--config" - - "experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py" - - "--output_dir" - - "gs://ub-ekb/segmenter/cityscapes/run_vitl32_split100/checkpoints" - - "--num_cores" - - "8" - - "--tpu" - - "local" - - ${args} \ No newline at end of file diff --git a/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py b/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py deleted file mode 100644 index badb16ebf..000000000 --- a/experimental/cityscapes/experiments/toy/segmenter_cityscapes.py +++ /dev/null @@ -1,161 +0,0 @@ -# coding=utf-8 -# Copyright 2021 The Uncertainty Baselines Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# pylint: disable=line-too-long -r"""Segmenter + cityscapes. - -""" -# pylint: enable=line-too-long - -import ml_collections -#import get_fewshot # local file import - -_CITYSCAPES_TRAIN_SIZE = 2975 -DEBUG = 1 -STRIDE = 4 - -target_size=(512, 512) -# debug on mac -if DEBUG == 1: - batch_size = 1 - number_train_examples_debug = 10 - number_eval_examples_debug = 10 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/small vit -elif DEBUG == 2: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 2 - num_heads = 1 - num_layers = 1 - hidden_size = 1 -# debug on v3-8: 1 epoch/16 samples/regular vit -elif DEBUG == 3: - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 -elif DEBUG == 4: - target_size =(128, 128) - STRIDE=16 - batch_size=8 - number_train_examples_debug = 16 - number_eval_examples_debug = 16 - num_training_epochs = 1 # ml_collections.FieldReference(100) - log_eval_steps = 1 - - mlp_dim = 3072 - num_heads = 12 - num_layers = 12 - hidden_size = 768 - - -def get_config(): - """Config for training a patch-transformer on JFT.""" - config = ml_collections.ConfigDict() - - config.experiment_name = 'cityscapes_segvit_ub' - - config.dataset_name = 'cityscapes' - config.dataset_configs = ml_collections.ConfigDict() - config.dataset_configs.target_size = target_size - # flags to debug scenic on mac - config.dataset_configs.number_train_examples_debug = number_train_examples_debug - config.dataset_configs.number_eval_examples_debug = number_train_examples_debug - - # config following scenic - config.num_classes = 19 - config.model_name = 'segmenter_mini' - config.model = ml_collections.ConfigDict() - - config.patches = ml_collections.ConfigDict() - config.patches.size = (STRIDE, STRIDE) - - config.backbone_configs = ml_collections.ConfigDict() - config.backbone_configs.type = 'vit' - config.backbone_configs.attention_dropout_rate = 0. - config.backbone_configs.dropout_rate = 0. - config.backbone_configs.classifier = 'gap' - - config.backbone_configs.mlp_dim = mlp_dim - config.backbone_configs.num_heads = num_heads - config.backbone_configs.num_layers = num_layers - config.backbone_configs.hidden_size = hidden_size - - config.decoder_configs = ml_collections.ConfigDict() - config.decoder_configs.type = 'linear' - - # training - config.optimizer = 'adam' - config.optimizer_configs = ml_collections.ConfigDict() - config.l2_decay_factor = 0.0 - config.max_grad_norm = 1.0 - config.label_smoothing = None - config.num_training_epochs = num_training_epochs - config.batch_size = batch_size - config.rng_seed = 0 - config.focal_loss_gamma = 0.0 - - # learning rate - #steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.batch_size - steps_per_epoch = number_train_examples_debug // config.batch_size - - # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. - config.lr_configs = ml_collections.ConfigDict() - config.lr_configs.learning_rate_schedule = 'compound' - config.lr_configs.factors = 'constant' # * cosine_decay * linear_warmup' - config.lr_configs.warmup_steps = 1 * steps_per_epoch - config.lr_configs.steps_per_cycle = num_training_epochs * steps_per_epoch - config.lr_configs.base_learning_rate = 1e-4 - - # model and data dtype - config.model_dtype_str = 'float32' - config.data_dtype_str = 'float32' - - #logging - config.write_summary = True # write TB and/or XM summary - config.write_xm_measurements = True # write XM measurements - #config.xprof = False # Profile using xprof - config.checkpoint = True # do checkpointing - config.checkpoint_steps = 5 * steps_per_epoch - - config.debug_train = True # debug mode during training - config.debug_eval = True # debug mode during eval - config.log_eval_steps = log_eval_steps #200 - - # extra - config.args = {} - - return config - - -def get_sweep(hyper): - return hyper.product([]) \ No newline at end of file diff --git a/experimental/cityscapes/pretrainer_utils.py b/experimental/cityscapes/pretrainer_utils.py deleted file mode 100644 index 0caed0d32..000000000 --- a/experimental/cityscapes/pretrainer_utils.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Include utils for pretraining - -""" -import importlib -import os -import ml_collections - -def load_relative_config(relative_fpath): - """ Reads config of model in ub. - - Args: - relative_fpath: path of config file relative to its location in ub. - - """ - # loader = importlib.machinery.SourceFileLoader('get_config', os.path.abspath(relative_fpath)) - # config = loader.load_module() - # config_module_spec = importlib.util.spec_from_file_location('get_config', os.path.abspath("../../baselines/jft/experiments/imagenet21k_vit_base16.py")) - # config_module = importlib.util.module_from_spec(config_module_spec) - # config_module_spec.loader.exec_module(config_module) - # return config - raise NotImplementedError("") - - -def load_bb_config(config): - """ Temporary toy bb config. - - Args: - config: model config. - - Returns: - restored_model_cfg: mock model config - """ - #TODO(kellybuchanan): we can read the grid size (pose-embedding) given config file. - # Current approach estimates it from the aata directly, see init_backbone_from_train_state - - restored_model_cfg = ml_collections.ConfigDict() - #restored_model_cfg.patches = ml_collections.ConfigDict() - #restored_model_cfg.patches.size = [16, 16] - restored_model_cfg.classifier = 'token' - # if config.pretrained_backbone_configs.type == 'base': - # restored_model_cfg.model.transformer.dropout_rate = 0.1 - - #restored_model_cfg.patches.grid = [224//16, 224//16] - - return restored_model_cfg diff --git a/experimental/cityscapes/run_deterministic_eval.sh b/experimental/cityscapes/run_deterministic_eval.sh deleted file mode 100755 index c973ebd93..000000000 --- a/experimental/cityscapes/run_deterministic_eval.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# train cityscapes using segmenter with pretrained backbone -# supports 2 options to - -function get_config() -{ - local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" - echo "$config_file_name" -} - -if [ "$(uname)" = "Darwin" ] ; then - # Do something under Mac OS X platform - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" - num_cores=0 - tpu=False - use_gpu=False - python deterministic_eval.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - # --tpu=$tpu -elif [ "$(uname)" = "Linux" ]; then - echo "in Linux" - train_mode="scratch" - train_split=100 - rng_seed=0 - config_file=$(get_config $train_mode $train_split) - run_name="${train_mode}_split${train_split}_seed${rng_seed}" - #config_file='experiments/imagenet21k_segmenter_cityscapes13.py' - #output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" - output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" - num_cores=8 - tpu='local' - use_gpu=False - python3 deterministic_eval_gcp.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu -# --config.batch_size=${batch_size} \ - -fi - -#%% diff --git a/experimental/cityscapes/run_deterministic_eval_l32.sh b/experimental/cityscapes/run_deterministic_eval_l32.sh deleted file mode 100755 index c1ff7e531..000000000 --- a/experimental/cityscapes/run_deterministic_eval_l32.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# train cityscapes using segmenter with pretrained backbone -# supports 2 options to - -function get_config() -{ - local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1_$2.py" - echo "$config_file_name" -} - -if [ "$(uname)" = "Darwin" ] ; then - # Do something under Mac OS X platform - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" - num_cores=0 - tpu=False - use_gpu=False - python deterministic_eval.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - # --tpu=$tpu -elif [ "$(uname)" = "Linux" ]; then - echo "in Linux" - train_mode="deterministic" - train_split=100 - rng_seed=0 - config_file=$(get_config $train_mode $train_split) - run_name="${train_mode}_split${train_split}_seed${rng_seed}" - #config_file='experiments/imagenet21k_segmenter_cityscapes13.py' - #output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" - output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_l32/checkpoints/${run_name}" - num_cores=8 - tpu='local' - use_gpu=False - python3 deterministic_eval_l32.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu -# --config.batch_size=${batch_size} \ - -fi - -#%% diff --git a/experimental/cityscapes/run_deterministic_eval_splits_vm.sh b/experimental/cityscapes/run_deterministic_eval_splits_vm.sh deleted file mode 100755 index 25a565d2e..000000000 --- a/experimental/cityscapes/run_deterministic_eval_splits_vm.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# train segmenter model on cityscapes using different pretrained backbones for different splits - -function get_config() -{ - local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" - echo "$config_file_name" -} - -num_cores=8 -tpu='local' -use_gpu=False - - -for rng_seed in 0 1 2 3 -do -for train_mode in "deterministic" "gp" "scratch" -do -for train_split in 100 75 50 25 10 -do -config_file=$(get_config $train_mode $train_split) -run_name="${train_mode}_split${train_split}_seed${rng_seed}" -output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" -echo "Running experiment ${output_dir_ckpt}" -python3 deterministic_eval.py --output_dir=${output_dir_ckpt} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu - #--config.rng_seed=${rng_seed} \ - #--tpu=$tpu - -output_dir_logit="${output_dir_ckpt}/logits" -echo "copy logits to bucket" -gsutil cp -r "logits/val.h5py" "${output_dir_logit}/val.h5py" -echo "remove logits_file" -rm "logits/val.h5py" - -done -done -done diff --git a/experimental/cityscapes/run_deterministic_splits_vm.sh b/experimental/cityscapes/run_deterministic_splits_vm.sh deleted file mode 100755 index fefeb6ba7..000000000 --- a/experimental/cityscapes/run_deterministic_splits_vm.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# train segmenter model on cityscapes using different pretrained backbones for different splits - -function get_config() -{ - local config_file_name="experiments/splits/imagenet21k_segmenter_cityscapes_$1_$2.py" - echo "$config_file_name" -} - -num_cores=8 -tpu='local' -use_gpu=False - - -for rng_seed in 4 -do -for train_mode in "deterministic" "gp" "scratch" -do -for train_split in 100 75 50 25 10 -do -config_file=$(get_config $train_mode $train_split) -run_name="${train_mode}_split${train_split}_seed${rng_seed}" -output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits1/checkpoints/${run_name}" -echo "Running experiment ${output_dir_ckpt}" -python3 deterministic.py --output_dir=${output_dir_ckpt} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - --tpu=$tpu -done -done -done diff --git a/experimental/cityscapes/run_ensemble.sh b/experimental/cityscapes/run_ensemble.sh deleted file mode 100755 index 931e4f5b7..000000000 --- a/experimental/cityscapes/run_ensemble.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -# train segmenter model on cityscapes using different pretrained backbones for different splits - -function get_config() -{ - local config_file_name="experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_$1.py" - echo "$config_file_name" -} - -function get_pretrained_backbone_path() -{ - local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" - echo "$checkpoint_path" -} - -#base_output_dir="outputs/ensemble" -base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" - -declare CITYSCAPES_TRAIN_SIZE=( - ["1"]="29" - ["10"]="298" - ["25"]="744" - ["50"]="1488" - ["75"]="2231" - ["100"]="2975" - ) - -# Debug on Mac OS X platform -use_gpu=False -if [ "$(uname)" = "Darwin" ] ; then -tpu=False -num_cores=0 -batch_size=1 -elif [ "$(uname)" = "Linux" ]; then -tpu='local' -num_cores=8 -batch_size=8 -fi -for num_training_epochs in 50 #30 50 150 -do -for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" -do -for rng_seed in 0 1 2 -do -for model_type in "deterministic" -do -for split in 100 -do - config_file=$(get_config $model_type) - learning_rate=$( echo "$lr" | bc ) - run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" - output_dir="${base_output_dir}/${run_name}" - train_split="train[:${split}%]" - num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} - python deterministic.py \ - --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - --config.dataset_configs.train_split=${train_split} \ - --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ - --config.batch_size=${batch_size} \ - --tpu=${tpu} \ - --config.lr_configs.base_learning_rate=${learning_rate} \ - --config.num_training_epochs=${num_training_epochs} \ - #--config.upstream_model=${model_type} \ - # --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ - -done -done -done -done -done \ No newline at end of file diff --git a/experimental/cityscapes/run_ensemble2.sh b/experimental/cityscapes/run_ensemble2.sh deleted file mode 100755 index e4232ee1d..000000000 --- a/experimental/cityscapes/run_ensemble2.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -# train segmenter model on cityscapes using different pretrained backbones for different splits - -function get_config() -{ - local config_file_name="experiments/sweep_vit32/imagenet21k_segmenter_cityscapes_deterministic.py" - echo "$config_file_name" -} - -function get_pretrained_backbone_path() -{ - local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" - echo "$checkpoint_path" -} - -#base_output_dir="outputs/ensemble" -base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" - -declare CITYSCAPES_TRAIN_SIZE=( - ["1"]="29" - ["10"]="298" - ["25"]="744" - ["50"]="1488" - ["75"]="2231" - ["100"]="2975" - ) - -# Debug on Mac OS X platform -use_gpu=False -if [ "$(uname)" = "Darwin" ] ; then -tpu=False -num_cores=0 -batch_size=1 -elif [ "$(uname)" = "Linux" ]; then -tpu='local' -num_cores=8 -batch_size=8 -fi -for num_training_epochs in 50 #30 50 150 -do -for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" -do -for rng_seed in 0 1 2 -do -for model_type in "deterministic" -do -for split in 100 -do - config_file=$(get_config $model_type) - learning_rate=$( echo "$lr" | bc ) - run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" - output_dir="${base_output_dir}/${run_name}" - train_split="train[:${split}%]" - num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} - pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) - python deterministic.py \ - --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - --config.dataset_configs.train_split=${train_split} \ - --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ - --config.batch_size=${batch_size} \ - --tpu=${tpu} \ - --config.lr_configs.base_learning_rate=${learning_rate} \ - --config.num_training_epochs=${num_training_epochs} \ - --config.upstream_model=${model_type} \ - --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ - -done -done -done -done -done \ No newline at end of file diff --git a/experimental/cityscapes/run_ensemble_debug.sh b/experimental/cityscapes/run_ensemble_debug.sh deleted file mode 100755 index 96b90943f..000000000 --- a/experimental/cityscapes/run_ensemble_debug.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash - -# debug for run_ensemble -# train segmenter model on cityscapes using different pretrained backbones for different splits - -# debug for model -DEBUG=1 - - -function get_config() -{ - #local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1.py" - local config_file_name="experiments/imagenet21k_segmenter_cityscapes3.py" - - echo "$config_file_name" -} - -function get_pretrained_backbone_path() -{ - local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" - echo "$checkpoint_path" -} - -# base_output_dir="outputs/ensemble" - -#base_output_dir="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints" -base_output_dir='gs://ub-ekb/segmenter/cityscapes/run_splits_debug/checkpoints' - -declare CITYSCAPES_TRAIN_SIZE=( - ["1"]="29" - ["10"]="298" - ["25"]="744" - ["50"]="1488" - ["75"]="2231" - ["100"]="2975" - ) - -# Debug on Mac OS X platform -use_gpu=False -if [ "$(uname)" = "Darwin" ] ; then -tpu=False -num_cores=0 -batch_size=1 -elif [ "$(uname)" = "Linux" ]; then -tpu='local' -num_cores=8 -batch_size=8 -fi -for num_training_epochs in 5 #30 50 150 -do -for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" -do -for rng_seed in 1 -do -for model_type in "deterministic" -do -for split in 1 -do - config_file=$(get_config $model_type) - learning_rate=$( echo "$lr" | bc ) - run_name="${model_type}_split${split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" - output_dir="${base_output_dir}/${run_name}" - train_split="train[:${split}%]" - num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} - pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) - python deterministic.py \ - --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - --config.dataset_configs.train_split=${train_split} \ - --config.dataset_configs.number_train_examples_debug=${num_train_examples} \ - --config.batch_size=${batch_size} \ - --tpu=${tpu} \ - --config.lr_configs.base_learning_rate=${learning_rate} \ - --config.num_training_epochs=${num_training_epochs} \ - #--config.upstream_model=${model_type} \ - # --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ - -done -done -done -done -done \ No newline at end of file diff --git a/experimental/cityscapes/run_ensemble_eval.sh b/experimental/cityscapes/run_ensemble_eval.sh deleted file mode 100755 index 917c87049..000000000 --- a/experimental/cityscapes/run_ensemble_eval.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -# Run deterministic - -base_output_dir="outputs/ensemble" - -declare CITYSCAPES_TRAIN_SIZE=( - ["1"]="29" - ["10"]="298" - ["25"]="744" - ["50"]="1488" - ["75"]="2231" - ["100"]="2975" - ) - -if [ "$(uname)" = "Darwin" ] ; then - # Do something under Mac OS X platform -for split in 1 -do -for model_type in "scratch" -#for model_type in "deterministic" -do -for rng_seed in 0 -do - config_file="experiments/imagenet21k_segmenter_cityscapes2.py" - output_dir="${base_output_dir}/eval" - checkpoint_dir="${base_output_dir}" - num_cores=0 - tpu=False - use_gpu=False - train_split="train[:${split}%]" - num_train_examples=${CITYSCAPES_TRAIN_SIZE[$split]} - python ensemble_eval.py \ - --output_dir=${output_dir} \ - --checkpoint_dir=${checkpoint_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - #--config.rng_seed=${rng_seed} \ - #--config.dataset_configs.train_split=${train_split} \ - #--config.dataset_configs.number_train_examples_debug=${num_train_examples} \ - #--config.batch_size=8 \ - #--config.upstream_model=${model_type} \ - # - -done -done -done -elif [ "$(uname)" = "Linux" ]; then - echo "in Linux" - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" - num_cores=8 - tpu='local' - use_gpu=False - python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu -fi diff --git a/experimental/cityscapes/run_l32_splits_vm.sh b/experimental/cityscapes/run_l32_splits_vm.sh deleted file mode 100755 index 2ce27bea1..000000000 --- a/experimental/cityscapes/run_l32_splits_vm.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -: ' -train segmenter model on cityscapes using different pretrained backbones for different splits - -Other parameters: number of training epochs, learning_rate, train_mode. - - -To List checkpoints run: -gsutil ls gs://ub-ekb/segmenter/cityscapes/run_splits_l32 - - - -' - -function get_config() -{ - local config_file_name="experiments/splits_l32/imagenet21k_segmenter_cityscapes_$1_$2.py" - echo "$config_file_name" -} - -function get_pretrained_backbone_path() -{ - local checkpoint_path="gs://ub-checkpoints/ImageNet21k_ViT-L32/$1/checkpoint.npz" - echo "$checkpoint_path" -} -num_cores=8 -tpu='local' -use_gpu=False - -for num_training_epochs in 100 #30 50 150 -do -for lr in "0.0001" # "0.03" "0.01" "0.003" "0.001" -do -for rng_seed in 1 2 3 4 -do -for train_mode in "deterministic" #"scratch" -do -for train_split in 100 # 75 50 25 10 -do -learning_rate=$( echo "$lr" | bc ) -config_file=$(get_config $train_mode $train_split) -run_name="${train_mode}_split${train_split}_seed${rng_seed}_lr${learning_rate}_step${num_training_epochs}" -output_dir_ckpt="gs://ub-ekb/segmenter/cityscapes/run_splits_vitl32/checkpoints/${run_name}" -pretrained_backbone=$(get_pretrained_backbone_path $rng_seed) -echo "${pretrained_backbone}" -echo "Running experiment ${output_dir_ckpt}" -#: ' -python3 deterministic.py --output_dir=${output_dir_ckpt} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --config.rng_seed=${rng_seed} \ - --config.lr_configs.base_learning_rate=${learning_rate} \ - --config.num_training_epochs=${num_training_epochs} \ - --tpu=$tpu -# --config.pretrained_backbone_configs.checkpoint_path=${pretrained_backbone} \ -#' -done -done -done -done -done diff --git a/experimental/cityscapes/run_pretrained.sh b/experimental/cityscapes/run_pretrained.sh deleted file mode 100755 index cafed9719..000000000 --- a/experimental/cityscapes/run_pretrained.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -# train cityscapes using segmenter with pretrained backbone -# supports 2 options to - - -if [ "$(uname)" = "Darwin" ] ; then - # Do something under Mac OS X platform - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - #config_file='experiments/splits_l32/imagenet21k_segmenter_cityscapes_deterministic_10.py' - #config_file='experiments/splits/imagenet21k_segmenter_cityscapes_deterministic_10.py' - - output_dir="/Users/ekellbuch/Projects/ood_segmentation/ub_ekb/experimental/cityscapes/outputs" - num_cores=0 - tpu=False - use_gpu=False - python deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - # --tpu=$tpu -elif [ "$(uname)" = "Linux" ]; then - echo "in Linux" - config_file='experiments/imagenet21k_segmenter_cityscapes1.py' - output_dir="/home/ekellbuch/ub_ekb/experimental/cityscapes/outputs13" - num_cores=8 - tpu='local' - use_gpu=False - python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu -fi diff --git a/experimental/cityscapes/run_pretrained_vm.sh b/experimental/cityscapes/run_pretrained_vm.sh deleted file mode 100755 index fe03ba4a2..000000000 --- a/experimental/cityscapes/run_pretrained_vm.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# train cityscapes using segmenter with pretrained backbone -# supports 2 options to - -declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") - -num_cores=8 -tpu='local' -use_gpu=False - -for config_mode in "sngp" "deterministic" "scratch" -do -config_file="${configfiles[$config_mode]}" -output_dir="gs://ub-ekb/segmenter/cityscapes/run0/$config_mode" -echo "${output_dir} ${config_file}" -python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu - -done diff --git a/experimental/cityscapes/run_pretrained_vm512.sh b/experimental/cityscapes/run_pretrained_vm512.sh deleted file mode 100755 index 4fac4a271..000000000 --- a/experimental/cityscapes/run_pretrained_vm512.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# train cityscapes using segmenter with pretrained backbone -# supports options to try - -declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes512.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp512.py" ["scratch"]="experiments/segmenter_cityscapes512.py") - -num_cores=8 -tpu='local' -use_gpu=False - -for config_mode in "sngp" "scratch" #"deterministic" # "scratch" "sngp" -do -config_file="${configfiles[$config_mode]}" -output_dir="gs://ub-ekb/segmenter/cityscapes/run3/$config_mode" -echo "${output_dir} ${config_file}" -python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu - -done diff --git a/experimental/cityscapes/run_pretrained_vm_larger.sh b/experimental/cityscapes/run_pretrained_vm_larger.sh deleted file mode 100755 index 65d01c758..000000000 --- a/experimental/cityscapes/run_pretrained_vm_larger.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# train cityscapes using segmenter with pretrained backbone -# supports options to try - -declare -A configfiles=( ["deterministic"]="experiments/imagenet21k_segmenter_cityscapes_larger.py" ["sngp"]="experiments/imagenet21k_segmenter_cityscapes_sngp.py" ["scratch"]="experiments/segmenter_cityscapes.py") - -num_cores=8 -tpu='local' -use_gpu=False - -for config_mode in "deterministic" # "scratch" "sngp" -do -config_file="${configfiles[$config_mode]}" -output_dir="gs://ub-ekb/segmenter/cityscapes/run2/$config_mode" -echo "${output_dir} ${config_file}" -python3 deterministic.py --output_dir=${output_dir} \ - --num_cores=$num_cores \ - --use_gpu=$use_gpu \ - --config=${config_file} \ - --tpu=$tpu - -done diff --git a/experimental/cityscapes/uncertainty_metrics.py b/experimental/cityscapes/uncertainty_metrics.py deleted file mode 100644 index 47469afa5..000000000 --- a/experimental/cityscapes/uncertainty_metrics.py +++ /dev/null @@ -1,205 +0,0 @@ -""" -Include uncertainty metrics -""" -import jax.numpy as jnp -from typing import Optional, Any, Tuple, Union - -import numpy as np - -from scenic.model_lib.base_models.model_utils import apply_weights - -from jax import lax - -# TODO(kellybuchanan): consolidate metric calculation as class -# TODO(kellybuchanan): support cases where mask is 0 - - -def calculate_num_patches_binary_maps( - binary_acc_map: jnp.ndarray, - binary_unc_map: jnp.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """ - Calculate conditional probabilities in confusion matrix given binary - accuracy and uncertainty maps - """ - # number of patches that are accurate and certain - n_ac = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 0)), axis=(-1, -2) - ) - - # number of patches that are inaccurate and certain - n_ic = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 0)), axis=(-1, -2) - ) - # number of patches that are inaccurate and uncertain - n_iu = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 0), - jnp.equal(binary_unc_map, 1)), axis=(-1, -2) - ) - - # number of patches that are accurate and uncertain - n_au = jnp.sum(jnp.logical_and(jnp.equal(binary_acc_map, 1), - jnp.equal(binary_unc_map, 1)), axis=(-1, -2) - ) - - return n_ac, n_ic, n_iu, n_au - - -def calculate_uncertainty_confusion_matrix( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.5, - window_size: Optional[int] = 2) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """ - Calculate conditional probabilities - TODO(kellybuchanan): include weights for entropy calculation - """ - # --- - if labels.ndim == logits.ndim: # One-hot targets. - targets = jnp.argmax(labels, axis=-1) - else: - targets = labels - - preds = jnp.argmax(logits, axis=-1) - - # calculate binary accuracy map - correct = jnp.equal(preds, targets) - - # batch masking - if weights is not None: - correct = apply_weights(correct, weights) - - correct = correct.astype(jnp.float32) - - # A given patch is accurate if its acc > accuracy_threshold - binary_acc_map = reduce_2dmap(correct, window_size, accuracy_th).astype(jnp.float32) - - # Calculate uncertainty map - probs = jnp.exp(logits) / jnp.sum(jnp.exp(logits), -1, keepdims=True) - entropy = -jnp.sum(probs*jnp.log(probs), axis=-1).astype(jnp.float32) - - # A given patch is uncertain if its uncertainty > uncertainty_th - binary_unc_map = reduce_2dmap(entropy, window_size, uncertainty_th).astype(jnp.float32) - - # number of patches that are accurate and certain - n_ac, n_ic, n_iu, n_au = calculate_num_patches_binary_maps( - binary_acc_map, binary_unc_map) - - return n_ac, n_ic, n_iu, n_au - - -def calculate_puncert_inacc( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: - """ - Calculate p(uncertain | inaccurate) - """ - - n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( - logits=logits, - labels=labels, - weights=weights, - accuracy_th=accuracy_th, - uncertainty_th=uncertainty_th, - window_size=window_size) - - # p(uncertain | innacurate) - p_tmp = n_iu / (n_ic + n_iu) - - # TODO: ignore cases where mask is 0 - p_tmp = jnp.nan_to_num(p_tmp) - - return p_tmp - - -def calculate_pacc_cert( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: - """ - Calculate p(accurate|certain) - """ - # TODO(kellybuchanan): reconcile cases where there are no certain patches. - - n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( - logits=logits, - labels=labels, - weights=weights, - accuracy_th=accuracy_th, - uncertainty_th=uncertainty_th, - window_size=window_size) - - p_tmp = n_ac / (n_ac + n_ic) - - # TODO: ignore cases where mask is 0 - p_tmp = jnp.nan_to_num(p_tmp) - return p_tmp - - -def calculate_pavpu( - logits: jnp.ndarray, - labels: jnp.ndarray, - weights: Optional[jnp.ndarray] = None, - accuracy_th: Optional[float] = 0.5, - uncertainty_th: Optional[float] = 0.4, - window_size: Optional[int] = 2) -> jnp.ndarray: - """ - Calculate PavPu - """ - n_ac, n_ic, n_iu, n_au = calculate_uncertainty_confusion_matrix( - logits=logits, - labels=labels, - weights=weights, - accuracy_th=accuracy_th, - uncertainty_th=uncertainty_th, - window_size=window_size) - - # Patch accuracy vs Patch uncertainty - p_tmp = (n_ac + n_iu) / (n_ac + n_au + n_ic + n_iu) - - # TODO: ignore cases where mask is 0 - p_tmp = jnp.nan_to_num(p_tmp) - return p_tmp - - -def reduce_2dmap( - array_map: jnp.ndarray, - window_size: int = 4, - threshold: float = 0.5, - ) -> jnp.ndarray: - """ - Given a map, apply a 2d spatial strided convolution to avg adjacent values - """ - reduce_dims = 0 - - # Expand dims if necessary - if array_map.ndim == 3: - array_map = jnp.expand_dims(array_map, 0) - reduce_dims = 1 - - # Create a kernel - kernel = jnp.ones(array_map.shape[:-2] + (window_size, window_size)) - - # Convolve map with kernel - out = lax.conv(array_map, # lhs = NCHW image tensor - kernel, # rhs = OIHW conv kernel tensor - (window_size, window_size), # window strides - 'SAME') # padding mode - - # divide by window_size - out = jnp.divide(out, window_size*window_size) - - # binarize_map according to threshold - binary_map = jnp.greater(out, threshold) - - if reduce_dims: - binary_map = jnp.squeeze(binary_map, 0) - - return binary_map.astype(jnp.int32) From c0e09c62b53a352ea1310253971e5f8b5c86b191 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 29 Sep 2022 01:24:00 -0400 Subject: [PATCH 090/150] remove local changes not needed in main repository --- uncertainty_baselines/models/__init__.py | 10 --- uncertainty_baselines/models/segmenter_be.py | 2 +- uncertainty_baselines/models/vit.py | 38 +-------- .../models/vit_batchensemble.py | 34 +------- .../models/vit_batchensemble_test.py | 80 +------------------ 5 files changed, 8 insertions(+), 156 deletions(-) diff --git a/uncertainty_baselines/models/__init__.py b/uncertainty_baselines/models/__init__.py index 32a1e552b..48a45373f 100644 --- a/uncertainty_baselines/models/__init__.py +++ b/uncertainty_baselines/models/__init__.py @@ -102,16 +102,6 @@ logging.warning('Skipped Segmenter models due to NotFoundError.', exc_info=True) -try: - # Try to import Segmenter models. - from uncertainty_baselines.models.segmenter_be import segmenter_be_transformer -except ImportError: - logging.warning('Skipped Segmenter BE model due to ImportError.', exc_info=True) -except tf.errors.NotFoundError: - logging.warning('Skipped Segmenter BE model due to NotFoundError.', - exc_info=True) - - try: # Try to import models depending on tensorflow_models.official.nlp. from uncertainty_baselines.models import bert diff --git a/uncertainty_baselines/models/segmenter_be.py b/uncertainty_baselines/models/segmenter_be.py index 5fc319422..5d14961b0 100644 --- a/uncertainty_baselines/models/segmenter_be.py +++ b/uncertainty_baselines/models/segmenter_be.py @@ -206,4 +206,4 @@ def __call__(self, x: Array, *, train: bool, debug: bool = False): 'BE Input and output shapes do not match: %d vs. %d.', new_input_shape, x.shape[:-1]) - return x, out \ No newline at end of file + return x, out diff --git a/uncertainty_baselines/models/vit.py b/uncertainty_baselines/models/vit.py index fb5be41c8..77ce7aa58 100644 --- a/uncertainty_baselines/models/vit.py +++ b/uncertainty_baselines/models/vit.py @@ -112,9 +112,6 @@ class Encoder1DBlock(nn.Module): attention_dropout_rate: dropout for attention heads. deterministic: bool, deterministic or not (to apply dropout). num_heads: Number of heads in nn.MultiHeadDotProductAttention - stochastic_depth: probability of dropping a layer linearly grows from 0 to - the provided value. - """ mlp_dim: int @@ -122,26 +119,6 @@ class Encoder1DBlock(nn.Module): dtype: Dtype = jnp.float32 dropout_rate: float = 0.1 attention_dropout_rate: float = 0.1 - stochastic_depth: float = 0.0 - - def get_stochastic_depth_mask(self, x: jnp.ndarray, - deterministic: bool) -> jnp.ndarray: - """Generate the stochastic depth mask in order to apply layer-drop. - - Args: - x: Input tensor. - deterministic: Weather we are in the deterministic mode (e.g inference - time) or not. - - Returns: - Stochastic depth mask. - """ - if not deterministic and self.stochastic_depth: - shape = (x.shape[0],) + (1,) * (x.ndim - 1) - return jax.random.bernoulli( - self.make_rng('dropout'), self.stochastic_depth, shape) - else: - return 0.0 @nn.compact def __call__(self, inputs, *, deterministic): @@ -167,7 +144,7 @@ def __call__(self, inputs, *, deterministic): num_heads=self.num_heads, name='MultiHeadDotProductAttention_1')(x, x) x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - x = x * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + inputs + x = x + inputs # MLP block. y = nn.LayerNorm(dtype=self.dtype, name='LayerNorm_2')(x) @@ -178,7 +155,7 @@ def __call__(self, inputs, *, deterministic): dropout_rate=self.dropout_rate)( y, deterministic=deterministic) - return y * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + x + return x + y class Encoder(nn.Module): @@ -190,12 +167,6 @@ class Encoder(nn.Module): num_heads: Number of heads in nn.MultiHeadDotProductAttention dropout_rate: dropout rate. attention_dropout_rate: dropout rate in self attention. - stochastic_depth: probability of dropping a layer linearly grows from 0 to - the provided value. Our implementation of stochastic depth follows timm - library, which does per-example layer dropping and uses independent - dropping patterns for each skip-connection. - dtype: Dtype of activations. - """ num_layers: int @@ -203,7 +174,6 @@ class Encoder(nn.Module): num_heads: int dropout_rate: float = 0.1 attention_dropout_rate: float = 0.1 - stochastic_depth: float = 0.0 @nn.compact def __call__(self, inputs, *, train): @@ -230,8 +200,6 @@ def __call__(self, inputs, *, train): mlp_dim=self.mlp_dim, dropout_rate=self.dropout_rate, attention_dropout_rate=self.attention_dropout_rate, - stochastic_depth=(lyr / max(self.num_layers - 1, 1)) * - self.stochastic_depth, name=f'encoderblock_{lyr}', num_heads=self.num_heads)( x, deterministic=not train) @@ -331,4 +299,4 @@ def vision_transformer(num_classes: int, hidden_size=hidden_size, representation_size=representation_size, classifier=classifier, - fix_base_model=fix_base_model) + fix_base_model=fix_base_model) \ No newline at end of file diff --git a/uncertainty_baselines/models/vit_batchensemble.py b/uncertainty_baselines/models/vit_batchensemble.py index 1663d7169..219ace0fd 100644 --- a/uncertainty_baselines/models/vit_batchensemble.py +++ b/uncertainty_baselines/models/vit_batchensemble.py @@ -98,26 +98,6 @@ class Encoder1DBlock(nn.Module): dtype: Optional[DType] = None dropout_rate: float = 0.0 attention_dropout_rate: float = 0.0 - stochastic_depth: float = 0.0 - - def get_stochastic_depth_mask(self, x: jnp.ndarray, - deterministic: bool) -> jnp.ndarray: - """Generate the stochastic depth mask in order to apply layer-drop. - - Args: - x: Input tensor. - deterministic: Weather we are in the deterministic mode (e.g inference - time) or not. - - Returns: - Stochastic depth mask. - """ - if not deterministic and self.stochastic_depth: - shape = (x.shape[0],) + (1,) * (x.ndim - 1) - return jax.random.bernoulli( - self.make_rng('dropout'), self.stochastic_depth, shape) - else: - return 0.0 @nn.compact def __call__(self, @@ -152,7 +132,7 @@ def __call__(self, num_heads=self.num_heads, dropout_rate=self.attention_dropout_rate)(x, x) x = nn.Dropout(rate=self.dropout_rate)(x, deterministic=deterministic) - x = x * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + inputs + x = x + inputs # MLP block. y = nn.LayerNorm(dtype=self.dtype, name="LayerNorm_2")(x) @@ -164,7 +144,7 @@ def __call__(self, name="MlpBlock_3", dropout_rate=self.dropout_rate)(y, deterministic=deterministic) - return y * (1.0 - self.get_stochastic_depth_mask(x, deterministic)) + x + return x + y class BatchEnsembleEncoder(nn.Module): @@ -183,11 +163,6 @@ class BatchEnsembleEncoder(nn.Module): train: True if the module is used for training. be_layers: Sequence of layers where BE MLPs are included. If None, use BE MLP blocks in every other layer (1, 3, 5, ...). First layer is 0. - stochastic_depth: probability of dropping a layer linearly grows from 0 to - the provided value. Our implementation of stochastic depth follows timm - library, which does per-example layer dropping and uses independent - dropping patterns for each skip-connection. - """ num_layers: int mlp_dim: int @@ -200,7 +175,6 @@ class BatchEnsembleEncoder(nn.Module): attention_dropout_rate: float = 0.0 train: Optional[bool] = None be_layers: Optional[Sequence[int]] = None - stochastic_depth: float = 0.0 @nn.compact def __call__(self, @@ -238,8 +212,6 @@ def is_first_be_layer(lyr: int) -> bool: dtype=dtype, dropout_rate=self.dropout_rate, attention_dropout_rate=self.attention_dropout_rate, - stochastic_depth=(lyr / max(self.num_layers - 1, 1)) * - self.stochastic_depth, name=f"encoderblock_{lyr}") if lyr in be_layers: # We need to tile inputs before the first BE layer. @@ -376,4 +348,4 @@ def vision_transformer_be( representation_size=representation_size, classifier=classifier, head_kernel_init=head_kernel_init, - train=train) + train=train) \ No newline at end of file diff --git a/uncertainty_baselines/models/vit_batchensemble_test.py b/uncertainty_baselines/models/vit_batchensemble_test.py index 7e9ccba6f..28d84f272 100644 --- a/uncertainty_baselines/models/vit_batchensemble_test.py +++ b/uncertainty_baselines/models/vit_batchensemble_test.py @@ -24,86 +24,8 @@ import ml_collections import uncertainty_baselines as ub -import flax - - - -class VitTest(parameterized.TestCase): - - @parameterized.parameters( - ('gap', 3, 5987), - ) - def test_vision_transformer(self, classifier, representation_size, - expected_param_count): - # TODO(dusenberrymw): Clean this up once config dict is cleaned up in - # VisionTransformer. - def getList(parent, dict): - for key, value in dict.items(): - var_name = '{}/{}'.format(parent, key) - if isinstance(value, jax.numpy.ndarray): - print('{}, {}'.format(var_name, value.shape)) - else: - getList(var_name, value) - return dict.keys() - - DEBUG=1 #also visualize params for vit model - config = ml_collections.ConfigDict() - # Model parameters. - config.model = ml_collections.ConfigDict() - config.model.patches = ml_collections.ConfigDict() - config.model.patches.size = [16, 16] - config.model.hidden_size = 768 - config.model.representation_size = 768 - config.model.classifier = 'token' - config.model.transformer = ml_collections.ConfigDict() - config.model.transformer.num_layers = 12 - config.model.transformer.dropout_rate = 0.0 - config.model.transformer.mlp_dim = 3072 - config.model.transformer.num_heads = 12 - config.model.transformer.attention_dropout_rate = 0.0 - - num_examples = 2 - num_classes = 1000 - inputs = jnp.ones([num_examples, 224, 224, 3], jnp.float32) - - if DEBUG ==1: - model = ub.models.vision_transformer(num_classes=num_classes, **config.model) - - key = jax.random.PRNGKey(0) - variables = model.init(key, inputs, train=False) - - param_count = sum(p.size for p in jax.tree_flatten(variables)[0]) - print(param_count) - getList('variables', variables) - - logits, outputs = model.apply(variables, inputs, train=False) - self.assertEqual(logits.shape, (num_examples, num_classes)) - self.assertEqual( - set(outputs.keys()), - set(('stem', 'transformed', 'head_input', 'pre_logits', 'logits'))) - - # BatchEnsemble parameters. - config.model.transformer.be_layers = (9, 11) - config.model.transformer.ens_size = 3 - config.model.transformer.random_sign_init = 0.5 - config.fast_weight_lr_multiplier = 1.0 - - model = ub.models.PatchTransformerBE(num_classes=num_classes, **config.model) - - key = jax.random.PRNGKey(0) - variables = model.init(key, inputs, train=False) - - param_count = sum(p.size for p in jax.tree_flatten(variables)[0]) - print(param_count) - getList('variables', variables) - - logits, outputs = model.apply(variables, inputs, train=False) - self.assertEqual(logits.shape, (num_examples * config.model.transformer.ens_size, num_classes)) - - self.assertEqual( - set(outputs.keys()), set(('pre_logits',))) if __name__ == "__main__": - absltest.main() + absltest.main() \ No newline at end of file From 9e29d26b72cd242eb7acdde547450968e87cbc75 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 29 Sep 2022 01:26:46 -0400 Subject: [PATCH 091/150] remove unnecessary changes to main branch --- uncertainty_baselines/models/segmenter.py | 2 +- uncertainty_baselines/models/segmenter_be_test.py | 2 +- uncertainty_baselines/models/vit.py | 2 +- uncertainty_baselines/models/vit_batchensemble.py | 2 +- uncertainty_baselines/models/vit_batchensemble_test.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/uncertainty_baselines/models/segmenter.py b/uncertainty_baselines/models/segmenter.py index 535cd11ad..ded9f947f 100644 --- a/uncertainty_baselines/models/segmenter.py +++ b/uncertainty_baselines/models/segmenter.py @@ -152,4 +152,4 @@ def __call__(self, x: Array, *, train: bool, debug: bool = False): 'Input and output shapes do not match: %d vs. %d.', input_shape[:-1], x.shape[:-1]) - return x, out \ No newline at end of file + return x, out diff --git a/uncertainty_baselines/models/segmenter_be_test.py b/uncertainty_baselines/models/segmenter_be_test.py index dc2a2a647..ab019e81b 100644 --- a/uncertainty_baselines/models/segmenter_be_test.py +++ b/uncertainty_baselines/models/segmenter_be_test.py @@ -74,4 +74,4 @@ def test_segmenter_be_transformer(self, num_classes, hidden_size, img_h, if __name__ == '__main__': - absltest.main() \ No newline at end of file + absltest.main() diff --git a/uncertainty_baselines/models/vit.py b/uncertainty_baselines/models/vit.py index 77ce7aa58..d5a02d4db 100644 --- a/uncertainty_baselines/models/vit.py +++ b/uncertainty_baselines/models/vit.py @@ -299,4 +299,4 @@ def vision_transformer(num_classes: int, hidden_size=hidden_size, representation_size=representation_size, classifier=classifier, - fix_base_model=fix_base_model) \ No newline at end of file + fix_base_model=fix_base_model) diff --git a/uncertainty_baselines/models/vit_batchensemble.py b/uncertainty_baselines/models/vit_batchensemble.py index 219ace0fd..77f4ad09f 100644 --- a/uncertainty_baselines/models/vit_batchensemble.py +++ b/uncertainty_baselines/models/vit_batchensemble.py @@ -348,4 +348,4 @@ def vision_transformer_be( representation_size=representation_size, classifier=classifier, head_kernel_init=head_kernel_init, - train=train) \ No newline at end of file + train=train) diff --git a/uncertainty_baselines/models/vit_batchensemble_test.py b/uncertainty_baselines/models/vit_batchensemble_test.py index 28d84f272..61cbd5ad1 100644 --- a/uncertainty_baselines/models/vit_batchensemble_test.py +++ b/uncertainty_baselines/models/vit_batchensemble_test.py @@ -28,4 +28,4 @@ if __name__ == "__main__": - absltest.main() \ No newline at end of file + absltest.main() From 939446a279feebcdb0ca0a7bd3e855a6e077acdc Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 29 Sep 2022 11:05:54 -0400 Subject: [PATCH 092/150] add configs for runs with different seeds for segmenter and segmenter_gp for cityscapes and ade20k_ind --- .../configs/ade20k_ind/deterministic_seeds.py | 253 +++++++++++++++++ .../configs/ade20k_ind/gp_seeds.py | 267 ++++++++++++++++++ .../configs/cityscapes/deterministic_seeds.py | 238 ++++++++++++++++ .../configs/cityscapes/gp_seeds.py | 256 +++++++++++++++++ 4 files changed, 1014 insertions(+) create mode 100644 experimental/robust_segvit/configs/ade20k_ind/deterministic_seeds.py create mode 100644 experimental/robust_segvit/configs/ade20k_ind/gp_seeds.py create mode 100644 experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py create mode 100644 experimental/robust_segvit/configs/cityscapes/gp_seeds.py diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic_seeds.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic_seeds.py new file mode 100644 index 000000000..d7e7468ef --- /dev/null +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic_seeds.py @@ -0,0 +1,253 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on ade20k_ind. + +Compare performance across seeds. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (640, 640) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'ade20k_ind_deterministic_seeds' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'ade20k_ind' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 3e-5 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = False + config.eval_label_shift = False + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + seeds = hyper.sweep('config.rng_seed', range(0, 5)) + + + return hyper.product([seeds]) diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp_seeds.py b/experimental/robust_segvit/configs/ade20k_ind/gp_seeds.py new file mode 100644 index 000000000..5c69a5728 --- /dev/null +++ b/experimental/robust_segvit/configs/ade20k_ind/gp_seeds.py @@ -0,0 +1,267 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on ade20k_ind. + +Compare performance across seeds. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (640, 640) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'ade20k_ind_segmenter_gp_seeds' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'ade20k_ind' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 9. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 3e-5 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = False + config.eval_label_shift = False + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + seeds = hyper.sweep('config.rng_seed', range(0, 5)) + + return hyper.product([seeds]) + + diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py b/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py new file mode 100644 index 000000000..10629b9ce --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py @@ -0,0 +1,238 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on cityscapes dataset. + +Compare performance across seeds. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_seeds' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = False + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + seeds = hyper.sweep('config.rng_seed', range(0, 5)) + + + return hyper.product([seeds]) + diff --git a/experimental/robust_segvit/configs/cityscapes/gp_seeds.py b/experimental/robust_segvit/configs/cityscapes/gp_seeds.py new file mode 100644 index 000000000..04d9f24d3 --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/gp_seeds.py @@ -0,0 +1,256 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on cityscapes dataset. + +Compare performance across seeds. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_gp_seeds' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 3. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = False + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + + parameters = [ + hyper.sweep('config.model.decoder.gp_layer.normalize_input', + [True, False]), + hyper.sweep('config.model.decoder.mean_field_factor', + hyper.discrete(range(1, 10))), + hyper.sweep('config.model.decoder.gp_layer.hidden_kwargs.feature_scale', + [1.0, 2.0]), + ] + + return hyper.product(parameters) From 973dcd92226da3c9384f0622102c6e57d0d5e67b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 3 Oct 2022 11:24:23 -0400 Subject: [PATCH 093/150] update uncertainty matrix calculation to support multi host and multi device --- experimental/robust_segvit/custom_models.py | 5 +++++ .../robust_segvit/custom_segmentation_trainer_test.py | 1 + experimental/robust_segvit/run_gp_street_hazards.yaml | 2 +- experimental/robust_segvit/uncertainty_metrics.py | 3 ++- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_models.py b/experimental/robust_segvit/custom_models.py index f4b2b68a0..0cab33f38 100644 --- a/experimental/robust_segvit/custom_models.py +++ b/experimental/robust_segvit/custom_models.py @@ -391,6 +391,11 @@ def global_unc_metrics_fn( assert isinstance(all_unc_confusion_mats, list) # List of eval batches. cm = np.sum(all_unc_confusion_mats, axis=0) # Sum over eval batches. + if cm.ndim == 2: # [batch_size, 4] + pass + elif cm.ndim == 3: # [num_devices, batch_size per device, 4] + cm = np.sum(cm, axis=0) # sum over devices + assert cm.ndim == 2, ('Expecting uncertainty confusion matrix to have shape ' '[batch_size, 4], got ' f'{cm.shape}.') diff --git a/experimental/robust_segvit/custom_segmentation_trainer_test.py b/experimental/robust_segvit/custom_segmentation_trainer_test.py index 41410f059..5a6d06d42 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer_test.py +++ b/experimental/robust_segvit/custom_segmentation_trainer_test.py @@ -265,6 +265,7 @@ def test_unc_confusion_matrix(self, seed, masked_fraction): window_size=window_size, uncertainty_measure='softmax', ), axis_name='batch') + unc_confusion_matrix = [ cm_pmapped(labels=labels, logits=logits_, weights=masks) for labels, logits_, masks in diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_gp_street_hazards.yaml index 417303f0b..f6bbdf6b9 100755 --- a/experimental/robust_segvit/run_gp_street_hazards.yaml +++ b/experimental/robust_segvit/run_gp_street_hazards.yaml @@ -15,7 +15,7 @@ parameters: config.wandb_entity : value: ${{entity}} config.batch_size: - value: 16 + value: 24 config.model.decoder.mean_field_factor: values: [1, 2, 5, 6, 10] diff --git a/experimental/robust_segvit/uncertainty_metrics.py b/experimental/robust_segvit/uncertainty_metrics.py index 6327ee3d6..7f0d3c66d 100644 --- a/experimental/robust_segvit/uncertainty_metrics.py +++ b/experimental/robust_segvit/uncertainty_metrics.py @@ -57,7 +57,6 @@ def calculate_num_patches_binary_maps( unc_confusion_matrix = jnp.stack((n_ac, n_ic, n_iu, n_au), axis=-1) - unc_confusion_matrix = unc_confusion_matrix[jnp.newaxis, ...] # Dummy batch dim. return unc_confusion_matrix @@ -85,6 +84,7 @@ def get_pavpu(unc_confusion_matrix): def get_uncertainty_confusion_matrix( + *, logits: jnp.ndarray, labels: jnp.ndarray, uncertainty_measure: str = 'softmax', @@ -153,6 +153,7 @@ def get_uncertainty_confusion_matrix( unc_confusion_matrix = calculate_num_patches_binary_maps( binary_acc_map, binary_unc_map) + unc_confusion_matrix = unc_confusion_matrix[jnp.newaxis, ...] # Dummy batch dim. return unc_confusion_matrix From 2f13f76b55faf3d2fcb6e2872a803f715e9be3df Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 3 Oct 2022 11:27:16 -0400 Subject: [PATCH 094/150] add deterministic call for wandb --- .../run_deterministic_cityscapes.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 experimental/robust_segvit/run_deterministic_cityscapes.yaml diff --git a/experimental/robust_segvit/run_deterministic_cityscapes.yaml b/experimental/robust_segvit/run_deterministic_cityscapes.yaml new file mode 100755 index 000000000..0707f38f6 --- /dev/null +++ b/experimental/robust_segvit/run_deterministic_cityscapes.yaml @@ -0,0 +1,32 @@ +name: deterministic_cityscapes +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/deterministic.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/deterministic" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 3a4022aeb59e64a2f09619c2a85cffabb686f23c Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 3 Oct 2022 17:01:42 -0400 Subject: [PATCH 095/150] update batch size for deterministic city --- experimental/robust_segvit/run_deterministic_cityscapes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimental/robust_segvit/run_deterministic_cityscapes.yaml b/experimental/robust_segvit/run_deterministic_cityscapes.yaml index 0707f38f6..a9bb2eabd 100755 --- a/experimental/robust_segvit/run_deterministic_cityscapes.yaml +++ b/experimental/robust_segvit/run_deterministic_cityscapes.yaml @@ -14,6 +14,8 @@ parameters: value: ${{project}} config.wandb_entity : value: ${{entity}} + config.batch_size: + value: 16 From 63c8156e3fae299debf0bcbd3fe44d61b4731c2b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 3 Oct 2022 18:56:48 -0400 Subject: [PATCH 096/150] add custom_segmentation_trainer with store_logits code --- .../custom_segmentation_trainer.py | 81 +++++++++++++++++-- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 11bf0c33d..bbf288610 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -164,7 +164,21 @@ def evaluate(train_state: train_utils.TrainState, # Evaluate global metrics on one of the hosts (lead_host), but given # intermediate values collected from all hosts. - for _ in range(steps_per_eval): + # store logits + store_logits = config.eval_configs.get('store_logits', False) + + if store_logits: + store_logits_fname = os.path.join(workdir, prefix, "logits", "val.h5py") + f = h5py.File(store_logits_fname, 'w', libver='latest') + f.swmr_mode = True # single write multi-read + input_shape = dataset.meta_data['input_shape'][1:3] + num_classes = dataset.meta_data['num_classes'] + num_eval_examples = int(steps_per_eval * config.batch_size) + logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) + inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) + labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) + + for step_ in range(steps_per_eval): eval_batch = next(dataset.valid_iter) e_batch, e_logits, e_metrics, confusion_matrix, unc_confusion_matrix = eval_step_pmapped( train_state=train_state, batch=eval_batch) @@ -175,6 +189,16 @@ def evaluate(train_state: train_utils.TrainState, eval_all_unc_confusion_mats.append( to_cpu(unc_confusion_matrix, all_gather=True)) + if store_logits: + start_idx = step_ * config.batch_size + end_idx = start_idx + config.batch_size + logits_out[start_idx:end_idx] = e_logits + inputs_out[start_idx:end_idx] = e_batch['inputs'] + labels_out[start_idx:end_idx] = e_batch['labels'] + + if store_logits: + f.close() + # Compute global metrics eval_global_metrics_summary = {} if lead_host and global_metrics_fn is not None: @@ -226,6 +250,7 @@ def evaluate_ood( writer: metric_writers.MetricWriter, lead_host: Any, prefix: str = 'valid', + workdir: str ='', **kwargs, ) -> Dict[str, Any]: """Model evaluator. @@ -254,6 +279,20 @@ def evaluate_ood( auc_online = kwargs.pop('auc_online', False) + # store logits + store_logits = config.eval_configs.get('store_logits', False) + + if store_logits: + store_logits_fname = os.path.join(workdir, prefix, "logits", "val.h5py") + f = h5py.File(store_logits_fname, 'w', libver='latest') + f.swmr_mode = True # single write multi-read + input_shape = dataset.meta_data['input_shape'][1:3] + num_classes = dataset.meta_data['num_classes'] + num_eval_examples = int(steps_per_eval * config.batch_size) + logits_out = f.create_dataset('logits', (num_eval_examples,) + input_shape + (num_classes,)) + inputs_out = f.create_dataset('inputs', (num_eval_examples,) + input_shape + (3,)) + labels_out = f.create_dataset('labels', (num_eval_examples,) + input_shape) + if auc_online: # TODO(kellybuchanan): check split of data across devices. # initialize metrics: ideally in each device in each host/process/machine @@ -264,11 +303,20 @@ def evaluate_ood( auc_roc = tf.keras.metrics.AUC(curve='ROC') # Loop through each machine: - for _ in range(steps_per_eval): + for step_ in range(steps_per_eval): eval_batch = next(dataset.valid_iter) e_batch, e_logits = eval_step_pmapped( train_state=train_state, batch=eval_batch) + + if store_logits: + start_idx = step_ * config.batch_size + end_idx = start_idx + config.batch_size + logits_out[start_idx:end_idx] = e_logits + inputs_out[start_idx:end_idx] = e_batch['inputs'] + labels_out[start_idx:end_idx] = e_batch['labels'] + + # In eval_step_pmapped we have not used all gather, so each metric is in # each device and we should be able to compute devices separately @@ -279,6 +327,8 @@ def evaluate_ood( auc_pr.update_state( e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) + if store_logits: + f.close() # How to communicate metrics across hosts? # Ideally we can collect auc_metrics per host, merge them, compute result. # However, we cannot pass arbitraty class. @@ -337,6 +387,7 @@ def combine_states(all_auc_states): eval_summary = {'auroc': float(auc_roc.result().numpy()), 'auprc': float(auc_pr.result().numpy()), } + else: eval_logits = [] eval_ood_masks = [] @@ -592,10 +643,12 @@ def eval_step( # Collect predictions and batches from all hosts. # use all_gather to copy and replicate across all hosts # we skip doing this for batch and logits to save memory + # unless we want to store the logits # predictions = jnp.argmax(logits, axis=-1) # predictions = jax.lax.all_gather(predictions, 'batch') - # logits = jax.lax.all_gather(logits, 'batch') - # batch = jax.lax.all_gather(batch, 'batch') + if config.eval_configs.get('store_logits', False): + logits = jax.lax.all_gather(logits, 'batch') + batch = jax.lax.all_gather(batch, 'batch') confusion_matrix = jax.lax.all_gather(confusion_matrix, 'batch') unc_confusion_matrix = jax.lax.all_gather(unc_confusion_matrix, 'batch') @@ -660,9 +713,10 @@ def eval_step_baseline( # Collect predictions and batches from all hosts. # use all_gather to copy and replicate across all hosts # we can skip doing this for batch and logits to save memory - # is the OOM in tpu or cpu? - # batch = jax.lax.all_gather(batch, 'batch') - # logits = jax.lax.all_gather(logits, 'batch') + # jis the OOM in tpu or cpu? + if config.eval_configs.get('store_logits', False): + batch = jax.lax.all_gather(batch, 'batch') + logits = jax.lax.all_gather(logits, 'batch') return batch, logits @@ -1172,7 +1226,6 @@ def evaluate_ood_step( Returns: eval_summary: summary evaluation """ - del workdir eval_summary = {} if config.get('eval_covariate_shift', False): @@ -1222,6 +1275,7 @@ def evaluate_ood_step( lead_host=lead_host, global_metrics_fn=global_metrics_fn, global_unc_metrics_fn=global_unc_metrics_fn, + workdir=workdir, ) # Wait until computations are done before exiting. @@ -1274,6 +1328,7 @@ def evaluate_ood_step( eval_step_pmapped=eval_step_ood_pmapped, writer=writer, lead_host=lead_host, + workdir=workdir, ) # Wait until computations are done before exiting. @@ -1290,6 +1345,7 @@ def evaluate_cityscapes_c( lead_host: Any, global_metrics_fn: Any, global_unc_metrics_fn: Any, + workdir: str = None, ) -> Dict[str, Any]: """Evaluate cityscapes-c dataset. @@ -1343,6 +1399,7 @@ def evaluate_cityscapes_c( global_metrics_fn=global_metrics_fn, global_unc_metrics_fn=global_unc_metrics_fn, prefix=dataset.meta_data['prefix'], + workdir=workdir, ) local_list.append(eval_summary) @@ -1373,6 +1430,7 @@ def evaluate_fishyscapes( eval_step_pmapped: Any, writer: metric_writers.MetricWriter, lead_host: Any, + workdir: str = '', ) -> Dict[str, Any]: """Evaluate Fishyscapes dataset. @@ -1422,6 +1480,7 @@ def evaluate_fishyscapes( writer=writer, lead_host=lead_host, prefix=dataset.meta_data['prefix'], + workdir=workdir, **config.get('eval_robustness_configs', {}), ) @@ -1450,6 +1509,7 @@ def evaluate_ade20k_ood_open( eval_step_pmapped: Any, writer: metric_writers.MetricWriter, lead_host: Any, + workdir: str = '', ) -> Dict[str, Any]: """Evaluate ADE20k OOD dataset. @@ -1496,6 +1556,7 @@ def evaluate_ade20k_ood_open( writer=writer, lead_host=lead_host, prefix=dataset.meta_data['prefix'], + workdir=workdir, **config.get('eval_robustness_configs', {}), ) @@ -1522,6 +1583,7 @@ def evaluate_ade20k_corrupted( lead_host: Any, global_metrics_fn: Any, global_unc_metrics_fn: Any, + workdir : str, ) -> Dict[str, Any]: """Evaluate Ade20k-C dataset. @@ -1575,6 +1637,7 @@ def evaluate_ade20k_corrupted( global_metrics_fn=global_metrics_fn, global_unc_metrics_fn=global_unc_metrics_fn, prefix=dataset.meta_data['prefix'], + workdir=workdir, ) local_list.append(eval_summary) @@ -1605,6 +1668,7 @@ def evaluate_street_hazards_ood_open( eval_step_pmapped: Any, writer: metric_writers.MetricWriter, lead_host: Any, + workdir: str, ) -> Dict[str, Any]: """Evaluate StreetHazards OOD dataset. @@ -1651,6 +1715,7 @@ def evaluate_street_hazards_ood_open( writer=writer, lead_host=lead_host, prefix=dataset.meta_data['prefix'], + workdir=workdir, **config.get('eval_robustness_configs', {}), ) From a3dc36b6963fdadfd3c28eb85359f6dfa1b90e48 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 07:23:16 -0400 Subject: [PATCH 097/150] add segmm torch model eval --- .../configs/cityscapes/torch_eval.py | 209 ++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 experimental/robust_segvit/configs/cityscapes/torch_eval.py diff --git a/experimental/robust_segvit/configs/cityscapes/torch_eval.py b/experimental/robust_segvit/configs/cityscapes/torch_eval.py new file mode 100644 index 000000000..62d2a7a85 --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/torch_eval.py @@ -0,0 +1,209 @@ +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +CHECKPOINT_ORIGIN = 'torch-segmm' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) +EXPERIMENTID = 'torch-segmm-1' + +# Upstream +CHECKPOINT_PATHS = { + ('torch-segmm', 'L', 16, None, 'token', 'torch-segmm-1'): + 'gs://ub-ekb/seg_l16_linear/checkpoint_model.npy', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_torch_eval' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'segmm' + config.eval_configs.window_stride = 512 + config.model.input_shape = target_size + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.checkpoint_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.checkpoint_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the parameters used to compare multiple metrics during eval.""" + + checkpoints = hyper.chainit([ + checkpoint(hyper, 'ub', 'L', 16, None, 'token', 'torch-segmm-1'), + ]) + + return hyper.product([checkpoints]) From 48fec99883b91df2e64d848bc01afcfe11e6da9a Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 07:31:16 -0400 Subject: [PATCH 098/150] update be_eval to load checkpoint even when running locally --- .../robust_segvit/configs/cityscapes/be_eval.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/experimental/robust_segvit/configs/cityscapes/be_eval.py b/experimental/robust_segvit/configs/cityscapes/be_eval.py index a15182949..308ae3765 100644 --- a/experimental/robust_segvit/configs/cityscapes/be_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/be_eval.py @@ -153,6 +153,12 @@ def get_config(runlocal=''): config.eval_robustness_configs.method_name = 'msp' config.eval_robustness_configs.num_top_k = 5 + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + if runlocal: config.count_flops = False config.target_size = (128, 128) @@ -162,12 +168,6 @@ def get_config(runlocal=''): config.dataset_configs.train_split = 'train[:5%]' config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( 'batch_size') - else: - # Load checkpoint - config.checkpoint_configs = ml_collections.ConfigDict() - config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN - config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH - config.checkpoint_configs.classifier = 'token' return config From c756e02c96de8025a699eb7701b2b61950d23d49 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 07:58:41 -0400 Subject: [PATCH 099/150] add configuration to eval models on cityscapes --- .../configs/cityscapes/be_eval.py | 26 +- .../configs/cityscapes/deterministic_eval.py | 228 ++++++++++++++++ .../configs/cityscapes/gp_eval.py | 241 +++++++++++++++++ .../configs/cityscapes/het_eval.py | 245 ++++++++++++++++++ experimental/robust_segvit/run_eval_local.sh | 37 +++ 5 files changed, 771 insertions(+), 6 deletions(-) create mode 100644 experimental/robust_segvit/configs/cityscapes/deterministic_eval.py create mode 100644 experimental/robust_segvit/configs/cityscapes/gp_eval.py create mode 100644 experimental/robust_segvit/configs/cityscapes/het_eval.py create mode 100755 experimental/robust_segvit/run_eval_local.sh diff --git a/experimental/robust_segvit/configs/cityscapes/be_eval.py b/experimental/robust_segvit/configs/cityscapes/be_eval.py index 308ae3765..0415a17b4 100644 --- a/experimental/robust_segvit/configs/cityscapes/be_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/be_eval.py @@ -14,14 +14,14 @@ # limitations under the License. # pylint: disable=line-too-long -r"""Train segmenter model on cityscapes dataset. - -Compare performance from deterministic upstream checkpoints. +r"""Evaluate segmenter_be model on cityscapes dataset. """ # pylint: enable=line-too-long import ml_collections +import os +import datetime _CITYSCAPES_TRAIN_SIZE = 2975 _CITYSCAPES_TRAIN_SIZE_SPLIT = 146 @@ -33,12 +33,14 @@ RESNET_SIZE = None CLASSIFIER = 'token' target_size = (768, 768) -EXPERIMENTID = '43838585-16' +EXPERIMENTID = '45338505-1' # Upstream CHECKPOINT_PATHS = { ('ub', 'L', 16, None, 'token', '43838585-16'): 'gs://ub-ekb/checkpoints_to_upload/cityscapes/43838585-16', + ('ub', 'L', 16, None, 'token', '45338505-1'): + 'gs://ub-checkpoints/45338505-cityscapes_segmenter_be/1', } @@ -143,6 +145,7 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'segmm' config.eval_configs.window_stride = 512 + config.eval_configs.store_logits = False config.model.input_shape = target_size # Eval parameters for robustness @@ -150,8 +153,8 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'msp' - config.eval_robustness_configs.num_top_k = 5 + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 # Load checkpoint config.checkpoint_configs = ml_collections.ConfigDict() @@ -159,6 +162,17 @@ def get_config(runlocal=''): config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH config.checkpoint_configs.classifier = 'token' + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False config.target_size = (128, 128) diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py b/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py new file mode 100644 index 000000000..d028957a8 --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py @@ -0,0 +1,228 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Eval segmenter model trained on cityscapes dataset. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) + + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID = '45337813-1' + +# Upstream +CHECKPOINT_PATHS = { + ('ub', 'L', 16, None, 'token', '45337813-1'): + 'gs://ub-checkpoints/45337813-cityscapes_segmenter_pretrained/1', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_eval' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'segmm' + config.eval_configs.window_stride = 512 + config.eval_configs.store_logits = False + config.model.input_shape = target_size + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/configs/cityscapes/gp_eval.py b/experimental/robust_segvit/configs/cityscapes/gp_eval.py new file mode 100644 index 000000000..fa72060d2 --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/gp_eval.py @@ -0,0 +1,241 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Evaluate segmenter_gp model on cityscapes dataset. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID = '45338722-1' + +# Upstream +CHECKPOINT_PATHS = { + ('ub', 'L', 16, None, 'token', '45338722-1'): + 'gs://ub-checkpoints/ub-checkpoints/45338722-cityscapes_segmenter_gp_hyper/1', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_gp_eval' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 1. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'segmm' + config.eval_configs.window_stride = 512 + config.eval_configs.store_logits = False + config.model.input_shape = target_size + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/configs/cityscapes/het_eval.py b/experimental/robust_segvit/configs/cityscapes/het_eval.py new file mode 100644 index 000000000..f9c2863fa --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/het_eval.py @@ -0,0 +1,245 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on cityscapes dataset. + +Compare performance from deterministic upstream checkpoints. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_TRAIN_SIZE = 2975 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 146 + +# Model specs. +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (768, 768) + + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID = '45338794-1' + +# Upstream +CHECKPOINT_PATHS = { + ('ub', 'L', 16, None, 'token', '45338794-1'): + 'gs://ub-checkpoints/45338794-cityscapes_segmenter_het_base/1', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_het_eval' + + # Dataset. + config.dataset_name = 'cityscapes' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = (1024, 2048) + config.dataset_configs.train_split = 'train' + config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'het' + + # Het layer params + # temp: wide sweep [0.15, 0.3, 0.5, 0.75, 1.0, 1.5, 2.0] + config.model.decoder.temperature = 1.0 + # efficient low rank approx ~ FxK where K is the classes. False for K<20. + config.model.decoder.param_efficient = False + # F as a low rank approx of KxK matrix has num_factors: + # imagenet~15, jft~50, cifar~6, cityscapes~sweep(5-10). + config.model.decoder.num_factors = 5 + # mc_samples: use as much as can be afforded, ideally > 10. + config.model.decoder.mc_samples = 1000 + config.model.decoder.return_locs = False + # turn on to run an approx on KHW x KHW instead of KxK. + config.model.decoder.share_samples_across_batch = False + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 64 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'segmm' + config.eval_configs.window_stride = 512 + config.eval_configs.store_logits = False + config.model.input_shape = target_size + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.target_size = (128, 128) + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = 'train[:5%]' + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{'size': (stride, stride)}])) + + if vit_size == 'B': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append( + hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append( + hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append( + hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append( + hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) + diff --git a/experimental/robust_segvit/run_eval_local.sh b/experimental/robust_segvit/run_eval_local.sh new file mode 100755 index 000000000..16fed5f45 --- /dev/null +++ b/experimental/robust_segvit/run_eval_local.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# evaluate model using wandb +#wandb sweep run_toy_mac.yaml +# before make sure we can run code vanilla version: + +DATASET='ade20k_ind' # or cityscapes +DATASET='street_hazards' + +# Parameters +DATASET='cityscapes' +model='deterministic' + +base_output_dir="gs://ub-ekb/segmenter/${DATASET}/${model}_eval" + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=1 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +config_file="configs/${DATASET}/${model}_eval.py:runlocal" +run_name="${model}_eval" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--tpu=${tpu} \ From f9484f29ae076e610cdc643faa89685e7aeeebf4 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 08:52:47 -0400 Subject: [PATCH 100/150] add wandb yaml files to call eval experiments --- .../robust_segvit/run_cityscapes_be_eval.yaml | 36 +++++++++++++++++++ .../run_cityscapes_deterministic_eval.yaml | 36 +++++++++++++++++++ .../robust_segvit/run_cityscapes_gp_eval.yaml | 36 +++++++++++++++++++ .../run_cityscapes_het_eval.yaml | 36 +++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100755 experimental/robust_segvit/run_cityscapes_be_eval.yaml create mode 100755 experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml create mode 100755 experimental/robust_segvit/run_cityscapes_gp_eval.yaml create mode 100755 experimental/robust_segvit/run_cityscapes_het_eval.yaml diff --git a/experimental/robust_segvit/run_cityscapes_be_eval.yaml b/experimental/robust_segvit/run_cityscapes_be_eval.yaml new file mode 100755 index 000000000..e617097a8 --- /dev/null +++ b/experimental/robust_segvit/run_cityscapes_be_eval.yaml @@ -0,0 +1,36 @@ +name: cityscapes_be_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/be_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/be_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml new file mode 100755 index 000000000..03881ee1c --- /dev/null +++ b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml @@ -0,0 +1,36 @@ +name: cityscapes_deterministic_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/deterministic_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/deterministic_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/run_cityscapes_gp_eval.yaml b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml new file mode 100755 index 000000000..516f38d79 --- /dev/null +++ b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml @@ -0,0 +1,36 @@ +name: cityscapes_gp_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/gp_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/gp_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/run_cityscapes_het_eval.yaml b/experimental/robust_segvit/run_cityscapes_het_eval.yaml new file mode 100755 index 000000000..e7b4d1837 --- /dev/null +++ b/experimental/robust_segvit/run_cityscapes_het_eval.yaml @@ -0,0 +1,36 @@ +name: cityscapes_het_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/het_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/cityscapes/het_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 13069554ad7d40f5ba3908558ec79aa6581efa63 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 08:56:17 -0400 Subject: [PATCH 101/150] fix syntax error in wandb yaml files --- experimental/robust_segvit/run_cityscapes_be_eval.yaml | 2 +- .../robust_segvit/run_cityscapes_deterministic_eval.yaml | 2 +- experimental/robust_segvit/run_cityscapes_gp_eval.yaml | 2 +- experimental/robust_segvit/run_cityscapes_het_eval.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/experimental/robust_segvit/run_cityscapes_be_eval.yaml b/experimental/robust_segvit/run_cityscapes_be_eval.yaml index e617097a8..b21482ce1 100755 --- a/experimental/robust_segvit/run_cityscapes_be_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_be_eval.yaml @@ -16,7 +16,7 @@ parameters: value: ${{entity}} config.batch_size: value: 16 - config.store_logits: + config.eval_configs.store_logits: value: false diff --git a/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml index 03881ee1c..6ee22e029 100755 --- a/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml @@ -16,7 +16,7 @@ parameters: value: ${{entity}} config.batch_size: value: 16 - config.store_logits: + config.eval_configs.store_logits: value: false diff --git a/experimental/robust_segvit/run_cityscapes_gp_eval.yaml b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml index 516f38d79..0d16f0a88 100755 --- a/experimental/robust_segvit/run_cityscapes_gp_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml @@ -16,7 +16,7 @@ parameters: value: ${{entity}} config.batch_size: value: 16 - config.store_logits: + config.eval_configs.store_logits: value: false diff --git a/experimental/robust_segvit/run_cityscapes_het_eval.yaml b/experimental/robust_segvit/run_cityscapes_het_eval.yaml index e7b4d1837..595e178f5 100755 --- a/experimental/robust_segvit/run_cityscapes_het_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_het_eval.yaml @@ -16,7 +16,7 @@ parameters: value: ${{entity}} config.batch_size: value: 16 - config.store_logits: + config.eval_configs.store_logits: value: false From 69769c9743c31435f2ae34c1f643e4bbb62d5c44 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 09:48:02 -0400 Subject: [PATCH 102/150] fix bug in name of checkpoint --- experimental/robust_segvit/configs/cityscapes/be_eval.py | 2 -- experimental/robust_segvit/configs/cityscapes/gp_eval.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/experimental/robust_segvit/configs/cityscapes/be_eval.py b/experimental/robust_segvit/configs/cityscapes/be_eval.py index 0415a17b4..912b4e47f 100644 --- a/experimental/robust_segvit/configs/cityscapes/be_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/be_eval.py @@ -37,8 +37,6 @@ # Upstream CHECKPOINT_PATHS = { - ('ub', 'L', 16, None, 'token', '43838585-16'): - 'gs://ub-ekb/checkpoints_to_upload/cityscapes/43838585-16', ('ub', 'L', 16, None, 'token', '45338505-1'): 'gs://ub-checkpoints/45338505-cityscapes_segmenter_be/1', } diff --git a/experimental/robust_segvit/configs/cityscapes/gp_eval.py b/experimental/robust_segvit/configs/cityscapes/gp_eval.py index fa72060d2..db0438778 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/gp_eval.py @@ -39,7 +39,7 @@ # Upstream CHECKPOINT_PATHS = { ('ub', 'L', 16, None, 'token', '45338722-1'): - 'gs://ub-checkpoints/ub-checkpoints/45338722-cityscapes_segmenter_gp_hyper/1', + 'gs://ub-checkpoints/45338722-cityscapes_segmenter_gp_hyper/1', } From 972a86a4cfde9fabdb90ede2089373a660bdbc16 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 10:30:46 -0400 Subject: [PATCH 103/150] add eval config files for default (non-opt) parameter for different model --- .../configs/ade20k_ind/be_eval.py | 35 ++- .../configs/ade20k_ind/gp_eval.py | 254 ++++++++++++++++++ .../configs/ade20k_ind/het_eval.py | 40 ++- 3 files changed, 305 insertions(+), 24 deletions(-) create mode 100644 experimental/robust_segvit/configs/ade20k_ind/gp_eval.py diff --git a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py index a392c7c8c..d07ab719d 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py @@ -20,6 +20,8 @@ # pylint: enable=line-too-long import ml_collections +import datetime +import os _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -42,14 +44,14 @@ STRIDE = 16 RESNET_SIZE = None CLASSIFIER = 'token' -EXPERIMENTID = '43838358-2' +EXPERIMENTID = '45349725-1' target_size = (640, 640) # Upstream CHECKPOINT_PATHS = { - ('ub', 'L', 16, None, 'token', '43838358-2'): - 'gs://ub-ekb/checkpoints_to_upload/ade20k/43838358-2', + ('ub', 'L', 16, None, 'token', '45349725-1'): + 'gs://ub-checkpoints/45349725-ade20k_ind_segmenter_be/1', } @@ -169,8 +171,25 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'msp' - config.eval_robustness_configs.num_top_k = 5 + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. if runlocal: config.count_flops = False @@ -181,12 +200,6 @@ def get_config(runlocal=''): config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' config.num_train_examples = TRAIN_SAMPLES - else: - # Load checkpoint - config.checkpoint_configs = ml_collections.ConfigDict() - config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN - config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH - config.checkpoint_configs.classifier = 'token' return config diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py b/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py new file mode 100644 index 000000000..b8d1a5576 --- /dev/null +++ b/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py @@ -0,0 +1,254 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Eval segmenter model on ade20k_ind. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE +} + +# Model specs. +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (640, 640) + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID='45350699-1' +# Upstream +CHECKPOINT_PATHS = { + ('ub', 'L', 16, None, 'token', '45350699-1'): + 'gs://ub-checkpoints/45350699-ade20k_ind_segmenter_gp/1', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'ade20k_ind_segmenter_gp_eval' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'ade20k_ind' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 1. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 3e-5 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.model.input_shape = target_size + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) + diff --git a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py index bf3d08962..e7f3c1651 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py @@ -22,6 +22,8 @@ # pylint: enable=line-too-long import ml_collections +import datetime +import os _CITYSCAPES_FINE_TRAIN_SIZE = 2975 _CITYSCAPES_COARSE_TRAIN_SIZE = 19998 @@ -44,14 +46,14 @@ STRIDE = 16 RESNET_SIZE = None CLASSIFIER = 'token' -EXPERIMENTID = '43838062-14' +EXPERIMENTID = '45350817-1' target_size = (640, 640) # Upstream CHECKPOINT_PATHS = { - ('ub', 'L', 16, None, 'token', '43838062-14'): - 'gs://ub-ekb/checkpoints_to_upload/ade20k/43838062-14', + ('ub', 'L', 16, None, 'token', '45350817-1'): + 'gs://ub-checkpoints/45350817-ade20k_ind_segmenter_het_hyper/1', } @@ -116,12 +118,12 @@ def get_config(runlocal=''): # Het layer params # temp: wide sweep [0.15, 0.3, 0.5, 0.75, 1.0, 1.5, 2.0] - config.model.decoder.temperature = 2.0 + config.model.decoder.temperature = 1.0 # efficient low rank approx ~ FxK where K is the classes. False for K<20. config.model.decoder.param_efficient = False # F as a low rank approx of KxK matrix has num_factors: # imagenet~15, jft~50, cifar~6, cityscapes~sweep(5-10). - config.model.decoder.num_factors = 10 + config.model.decoder.num_factors = 5 # mc_samples: use as much as can be afforded, ideally > 10. config.model.decoder.mc_samples = 1000 config.model.decoder.return_locs = False @@ -179,8 +181,26 @@ def get_config(runlocal=''): config.eval_covariate_shift = True config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'msp' - config.eval_robustness_configs.num_top_k = 5 + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + if runlocal: config.count_flops = False @@ -191,12 +211,6 @@ def get_config(runlocal=''): config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' config.num_train_examples = TRAIN_SAMPLES - else: - # Load checkpoint - config.checkpoint_configs = ml_collections.ConfigDict() - config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN - config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH - config.checkpoint_configs.classifier = 'token' return config From 9ee51816022f325a2fb59c3047b327bbc7066f66 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 10:34:42 -0400 Subject: [PATCH 104/150] add wandb yaml files to call eval loaders --- .../robust_segvit/run_ade20k_ind_be_eval.yaml | 36 +++++++++++++++++++ .../robust_segvit/run_ade20k_ind_gp_eval.yaml | 36 +++++++++++++++++++ .../run_ade20k_ind_het_eval.yaml | 36 +++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100755 experimental/robust_segvit/run_ade20k_ind_be_eval.yaml create mode 100755 experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml create mode 100755 experimental/robust_segvit/run_ade20k_ind_het_eval.yaml diff --git a/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml new file mode 100755 index 000000000..4a1d2ff97 --- /dev/null +++ b/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml @@ -0,0 +1,36 @@ +name: ade20k_ind_be_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/ade20k_ind/be_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/ade20k_ind/be_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml new file mode 100755 index 000000000..61368503a --- /dev/null +++ b/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml @@ -0,0 +1,36 @@ +name: ade20k_ind_gp_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/ade20k_ind/gp_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/ade20k_ind/gp_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml new file mode 100755 index 000000000..40b997d6e --- /dev/null +++ b/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml @@ -0,0 +1,36 @@ +name: ade20k_ind_het_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: false + + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/ade20k_ind/het_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/ade20k_ind/het_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From c2469a512645f2a05b4718bea17ea357b150e177 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 10:37:24 -0400 Subject: [PATCH 105/150] add store_logits flag to eval config files --- experimental/robust_segvit/configs/ade20k_ind/be_eval.py | 1 + experimental/robust_segvit/configs/ade20k_ind/gp_eval.py | 1 + experimental/robust_segvit/configs/ade20k_ind/het_eval.py | 1 + 3 files changed, 3 insertions(+) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py index d07ab719d..df0aa2d3a 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be_eval.py @@ -165,6 +165,7 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' config.model.input_shape = target_size + config.eval_configs.store_logits = False # Eval parameters for robustness config.eval_label_shift = True diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py b/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py index b8d1a5576..f505258d7 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp_eval.py @@ -172,6 +172,7 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' config.model.input_shape = target_size + config.eval_configs.store_logits = False # Eval parameters for robustness config.eval_label_shift = True diff --git a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py index e7f3c1651..a8385762d 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het_eval.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het_eval.py @@ -175,6 +175,7 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' config.model.input_shape = target_size + config.eval_configs.store_logits = False # Eval parameters for robustness config.eval_label_shift = True From 9e817a6ab274700f81fa325de3957239d149c49a Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 11:33:19 -0400 Subject: [PATCH 106/150] add wandb yaml files to store logits --- .../store_cityscapes_be_eval.yaml | 37 +++++++++++++++++++ .../store_cityscapes_deterministic_eval.yaml | 37 +++++++++++++++++++ .../store_cityscapes_gp_eval.yaml | 37 +++++++++++++++++++ .../store_cityscapes_het_eval.yaml | 37 +++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100755 experimental/robust_segvit/store_cityscapes_be_eval.yaml create mode 100755 experimental/robust_segvit/store_cityscapes_deterministic_eval.yaml create mode 100755 experimental/robust_segvit/store_cityscapes_gp_eval.yaml create mode 100755 experimental/robust_segvit/store_cityscapes_het_eval.yaml diff --git a/experimental/robust_segvit/store_cityscapes_be_eval.yaml b/experimental/robust_segvit/store_cityscapes_be_eval.yaml new file mode 100755 index 000000000..16d6baa9f --- /dev/null +++ b/experimental/robust_segvit/store_cityscapes_be_eval.yaml @@ -0,0 +1,37 @@ +name: cityscapes_be_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: true + config.eval_covariate_shift: + value: false + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/be_eval.py" + - "--output_dir" + - "cityscapes/be_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/store_cityscapes_deterministic_eval.yaml b/experimental/robust_segvit/store_cityscapes_deterministic_eval.yaml new file mode 100755 index 000000000..f57045b45 --- /dev/null +++ b/experimental/robust_segvit/store_cityscapes_deterministic_eval.yaml @@ -0,0 +1,37 @@ +name: cityscapes_deterministic_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: true + config.eval_covariate_shift: + value: false + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/deterministic_eval.py" + - "--output_dir" + - "cityscapes/deterministic_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/store_cityscapes_gp_eval.yaml b/experimental/robust_segvit/store_cityscapes_gp_eval.yaml new file mode 100755 index 000000000..87b82c3cb --- /dev/null +++ b/experimental/robust_segvit/store_cityscapes_gp_eval.yaml @@ -0,0 +1,37 @@ +name: cityscapes_gp_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: true + config.eval_covariate_shift: + value: false + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/gp_eval.py" + - "--output_dir" + - "cityscapes/gp_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file diff --git a/experimental/robust_segvit/store_cityscapes_het_eval.yaml b/experimental/robust_segvit/store_cityscapes_het_eval.yaml new file mode 100755 index 000000000..ba6eadaff --- /dev/null +++ b/experimental/robust_segvit/store_cityscapes_het_eval.yaml @@ -0,0 +1,37 @@ +name: cityscapes_het_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: true + config.eval_covariate_shift: + value: false + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/cityscapes/het_eval.py" + - "--output_dir" + - "cityscapes/het_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 5d75b1117c1dc77a71eeaad662edd9e51a161d6f Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 11:55:06 -0400 Subject: [PATCH 107/150] update name of default directory --- experimental/robust_segvit/custom_segmentation_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index bbf288610..96fd925bb 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -168,7 +168,7 @@ def evaluate(train_state: train_utils.TrainState, store_logits = config.eval_configs.get('store_logits', False) if store_logits: - store_logits_fname = os.path.join(workdir, prefix, "logits", "val.h5py") + store_logits_fname = os.path.join(workdir, "{}_{}_val.h5py".format(prefix,"logits")) f = h5py.File(store_logits_fname, 'w', libver='latest') f.swmr_mode = True # single write multi-read input_shape = dataset.meta_data['input_shape'][1:3] @@ -283,7 +283,7 @@ def evaluate_ood( store_logits = config.eval_configs.get('store_logits', False) if store_logits: - store_logits_fname = os.path.join(workdir, prefix, "logits", "val.h5py") + store_logits_fname = os.path.join(workdir, "{}_{}_val.h5py".format(prefix,"logits")) f = h5py.File(store_logits_fname, 'w', libver='latest') f.swmr_mode = True # single write multi-read input_shape = dataset.meta_data['input_shape'][1:3] From eba4cd535e805a65aba4aa5677e6d67e32c27666 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 12:06:14 -0400 Subject: [PATCH 108/150] when computing ood metrics skip images wo any ood pixels or images where all pixels are ood --- .../robust_segvit/custom_segmentation_trainer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 96fd925bb..0e79c506b 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -322,9 +322,14 @@ def evaluate_ood( ood_score = get_ood_score(e_logits, **kwargs) - auc_roc.update_state( + # skip images where all the pixels are ood or there are no ood pixels + all_pixel_ood = jnp.sum(e_batch['label'] * e_batch['batch_mask']) == 1 + no_pixel_ood = jnp.sum(e_batch['label'] * e_batch['batch_mask']) == 0 + + if not (all_pixel_ood) and not (no_pixel_ood): + auc_roc.update_state( e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) - auc_pr.update_state( + auc_pr.update_state( e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) if store_logits: From ea02a0267f0f05b2085c9cea796bf9ed8730abdf Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 12:07:32 -0400 Subject: [PATCH 109/150] add wandb yaml config where we use 1-msp as ood score --- experimental/robust_segvit/run_cityscapes_be_eval.yaml | 4 ++++ .../robust_segvit/run_cityscapes_deterministic_eval.yaml | 5 ++++- experimental/robust_segvit/run_cityscapes_gp_eval.yaml | 4 ++++ experimental/robust_segvit/run_cityscapes_het_eval.yaml | 4 ++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_cityscapes_be_eval.yaml b/experimental/robust_segvit/run_cityscapes_be_eval.yaml index b21482ce1..b0afa8ebc 100755 --- a/experimental/robust_segvit/run_cityscapes_be_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_be_eval.yaml @@ -18,6 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' diff --git a/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml index 6ee22e029..b2eced3cc 100755 --- a/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_deterministic_eval.yaml @@ -18,7 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false - + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' command: diff --git a/experimental/robust_segvit/run_cityscapes_gp_eval.yaml b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml index 0d16f0a88..ff44994d1 100755 --- a/experimental/robust_segvit/run_cityscapes_gp_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_gp_eval.yaml @@ -18,6 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' diff --git a/experimental/robust_segvit/run_cityscapes_het_eval.yaml b/experimental/robust_segvit/run_cityscapes_het_eval.yaml index 595e178f5..671486f60 100755 --- a/experimental/robust_segvit/run_cityscapes_het_eval.yaml +++ b/experimental/robust_segvit/run_cityscapes_het_eval.yaml @@ -18,6 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' From 2fd0bcde62ff93c50ab1c65b8d9454623a996283 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 12:42:32 -0400 Subject: [PATCH 110/150] add wand yaml files with ood_score=msp --- experimental/robust_segvit/run_ade20k_ind_be_eval.yaml | 5 ++++- experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml | 4 ++++ experimental/robust_segvit/run_ade20k_ind_het_eval.yaml | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml index 4a1d2ff97..90863139c 100755 --- a/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml +++ b/experimental/robust_segvit/run_ade20k_ind_be_eval.yaml @@ -18,7 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false - + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' command: diff --git a/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml index 61368503a..0612dcdf1 100755 --- a/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml +++ b/experimental/robust_segvit/run_ade20k_ind_gp_eval.yaml @@ -18,6 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' diff --git a/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml index 40b997d6e..0ed843f4b 100755 --- a/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml +++ b/experimental/robust_segvit/run_ade20k_ind_het_eval.yaml @@ -18,7 +18,10 @@ parameters: value: 16 config.eval_configs.store_logits: value: false - + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' command: From 19a204ddd6e51f5f5ca80215cb82400517665478 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 15:26:44 -0400 Subject: [PATCH 111/150] update test for multihost metrics to exclude images where all pixels are ood or there are no ood pixels --- experimental/robust_segvit/metrics_multihost.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py index 95b884961..e12d2b4cd 100644 --- a/experimental/robust_segvit/metrics_multihost.py +++ b/experimental/robust_segvit/metrics_multihost.py @@ -87,7 +87,13 @@ def __init__(self, curve, num_thresholds=200): def calculate_and_update_scores(self, logits, label, sample_weight, *kwargs): ood_score = get_ood_score(logits, *kwargs) - self.auc.update_state(label, ood_score, sample_weight=sample_weight) + + # skip images where all the pixels are ood or there are no ood pixels + all_pixel_ood = jnp.sum(label*sample_weight) == 1 + no_pixel_ood = jnp.sum(label*sample_weight) == 0 + + if not(all_pixel_ood) and not(no_pixel_ood): + self.auc.update_state(label, ood_score, sample_weight=sample_weight) def gather_metrics(self): auc_state = keras_auc_to_arrays(self.auc) From c9cf75676cbd80bdc699926aa1f49f86b0796c93 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 4 Oct 2022 15:27:23 -0400 Subject: [PATCH 112/150] add wandb yaml file to evaluate deterministic ade20k model --- .../configs/ade20k_ind/deterministic_eval.py | 243 ++++++++++++++++++ .../run_ade20k_ind_deterministic_eval.yaml | 39 +++ 2 files changed, 282 insertions(+) create mode 100644 experimental/robust_segvit/configs/ade20k_ind/deterministic_eval.py create mode 100755 experimental/robust_segvit/run_ade20k_ind_deterministic_eval.yaml diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic_eval.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic_eval.py new file mode 100644 index 000000000..e5f8ff97e --- /dev/null +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic_eval.py @@ -0,0 +1,243 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Eval segmenter model on ade20k_ind. + + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE +} + +# Model specs. +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (640, 640) + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID = '45373386-1' + +# Upstream +CHECKPOINT_PATHS = { + ('ub', 'L', 16, None, 'token', '45373386-1'): + 'gs://ub-checkpoints/45373386-ade20k_ind_deterministic/1', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'ade20k_ind_deterministic_eval' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'ade20k_ind' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.model.input_shape = target_size + config.eval_configs.store_logits = False + + # Eval parameters for robustness + config.eval_label_shift = True + config.eval_covariate_shift = True + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.num_top_k = 1 + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = CHECKPOINT_PATH + config.checkpoint_configs.classifier = 'token' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + CHECKPOINT_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) + diff --git a/experimental/robust_segvit/run_ade20k_ind_deterministic_eval.yaml b/experimental/robust_segvit/run_ade20k_ind_deterministic_eval.yaml new file mode 100755 index 000000000..01347a6a9 --- /dev/null +++ b/experimental/robust_segvit/run_ade20k_ind_deterministic_eval.yaml @@ -0,0 +1,39 @@ +name: ade20k_ind_deterministic_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: false + config.eval_covariate_shift: + value: true + config.eval_robustness_configs.method_name: + value: 'msp' + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/ade20k_ind/deterministic_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/ade20k_ind/deterministic_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From c012056bcd7715b6f873cb898afc287b75006050 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 14:42:04 -0400 Subject: [PATCH 113/150] clean up readme --- experimental/robust_segvit/README.md | 39 ++++++++++------------- experimental/robust_segvit/run_toy_mac.sh | 31 +++++++++++++----- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/experimental/robust_segvit/README.md b/experimental/robust_segvit/README.md index 90e7ada02..711197e46 100644 --- a/experimental/robust_segvit/README.md +++ b/experimental/robust_segvit/README.md @@ -1,31 +1,26 @@ # Robust segvit -*Robust_segvit* is a codebase to evaluate the robustness of semantic segmentation models. +**Robust_segvit** is a codebase to evaluate the robustness of semantic segmentation models.
+The code is built on top of [uncertainty_baselines](https://github.com/google/uncertainty-baselines) and [Scenic](https://github.com/google-research/scenic). -Robust_segvit is developed in [JAX](https://github.com/google/jax) and uses [Flax](https://github.com/google/flax), [uncertainty_baselines](https://github.com/google/uncertainty-baselines) and [Scenic](https://github.com/google-research/scenic). +## Installation +Robust_segvit is developed in [JAX](https://github.com/google/jax)/[Flax](https://github.com/google/flax). -## Code structure -See uncertainty_baselines/google/experimental/cityscapes. +To run the code:
+1. Install [uncertainty_baselines](https://github.com/google/uncertainty-baselines).
+2. Install [Scenic](https://github.com/google-research/scenic).
+3. Follow the instructions for a toy run in [./run_deterministic_mac.sh](). +## Datasets +The experiment configurations for the different datasets are in: -## Cityscapes +[x] configs/cityscapes: Cityscapes dataset.
+[x] configs/ade20k_ind: ADE20k_ind dataset.
+[x] configs/street_hazards: Street Hazards dataset.
-We investigate the performance of different reliability methods on image segmentation tasks.
+## Comments: +[x] The checkpoint used for finetuning is the same the original segmenter model: [vit_large_patch16_384](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) -[x] configs/cityscapes: contains experiment configurations for the cityscapes dataset.
+## Citing work: - -## Debugging: - -To run the code on cpu, install the dependencies as in: -[x] Copy ananconda environment -[x] Install jaxlib, jax, flax from source -[x] Install scenic from source -[x] Install uncertainty_baselines from source - -## Issues -[] Fails to read segmenter_be model. - -## Comments -[x] Update default checkpoint: vit_large_patch16_384 (segmenter model uses this checkpoint) - from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py \ No newline at end of file +If you reference this code, please cite [our paper](https://github.com/google/uncertainty-baselines).
\ No newline at end of file diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index b4a79ae65..88e64e937 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -1,16 +1,26 @@ #!/bin/bash -# train toy model using wandb -#wandb sweep run_toy_mac.yaml -# before make sure we can run code vanilla version: +# ---------------------------------------------------- +# train toy model on a DATASET: +# ---------------------------------------------------- -DATASET='ade20k_ind' # or cityscapes +# to train toy model and track performance using wandb: +# wandb sweep run_toy_mac.yaml + +DATASET='ade20k_ind' DATASET='cityscapes' DATASET='street_hazards' +# ---------------------------------------------------- +# Set directory where outputs should be installed: +# ---------------------------------------------------- base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" - -# Debug on Mac OS X platform +run_name="toy_model" +output_dir="${base_output_dir}/${run_name}" +# ---------------------------------------------------- +# Set device configuration for Mac OS X platform +# or TPU v2-8/v3-8 frameworks. +# ---------------------------------------------------- use_gpu=False if [ "$(uname)" = "Darwin" ] ; then tpu=False @@ -22,9 +32,14 @@ num_cores=8 batch_size=8 fi +# ---------------------------------------------------- +# Set configuration file +# ---------------------------------------------------- config_file="configs/${DATASET}/toy_model.py:runlocal" -run_name="toy_model" -output_dir="${base_output_dir}/${run_name}" + +# ---------------------------------------------------- +# Call model trainer. +# ---------------------------------------------------- python deterministic.py \ --output_dir=${output_dir} \ --num_cores=$num_cores \ From bd6b60d47181dc7795a467f0f83966afd58ba449 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 14:43:42 -0400 Subject: [PATCH 114/150] clean readme --- experimental/robust_segvit/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/experimental/robust_segvit/README.md b/experimental/robust_segvit/README.md index 711197e46..9e0218e5d 100644 --- a/experimental/robust_segvit/README.md +++ b/experimental/robust_segvit/README.md @@ -1,7 +1,6 @@ # Robust segvit -**Robust_segvit** is a codebase to evaluate the robustness of semantic segmentation models.
-The code is built on top of [uncertainty_baselines](https://github.com/google/uncertainty-baselines) and [Scenic](https://github.com/google-research/scenic). +**Robust_segvit** is a codebase to evaluate the robustness of semantic segmentation models. The code is built on top of [uncertainty_baselines](https://github.com/google/uncertainty-baselines) and [Scenic](https://github.com/google-research/scenic). ## Installation Robust_segvit is developed in [JAX](https://github.com/google/jax)/[Flax](https://github.com/google/flax). @@ -14,12 +13,12 @@ To run the code:
## Datasets The experiment configurations for the different datasets are in: -[x] configs/cityscapes: Cityscapes dataset.
-[x] configs/ade20k_ind: ADE20k_ind dataset.
-[x] configs/street_hazards: Street Hazards dataset.
+- configs/cityscapes: Cityscapes dataset.
+- configs/ade20k_ind: ADE20k_ind dataset.
+- configs/street_hazards: Street Hazards dataset.
## Comments: -[x] The checkpoint used for finetuning is the same the original segmenter model: [vit_large_patch16_384](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) +- The checkpoint used for finetuning is the same the original segmenter model: [vit_large_patch16_384](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) ## Citing work: From 28a65320b88f2157718b271481c36c009706f6bd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 22:52:13 -0400 Subject: [PATCH 115/150] add multihost ece calculation --- .../custom_segmentation_trainer.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 0e79c506b..14cd30bd6 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -53,6 +53,8 @@ import os import resource import sys +import copy +import robustness_metrics as rm Batch = Dict[str, jnp.ndarray] MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], @@ -64,6 +66,14 @@ PyTree = Any +def host_all_gather_metrics(metric): + states = multihost_utils.process_allgather(metric.get_weights()) + state = jax.tree_util.tree_map(lambda x: np.sum(x, axis=0), states) + metric_copy = copy.deepcopy(metric) + metric_copy.set_weights(state) + return metric_copy + + def to_cpu(x, all_gather=False): """Send x to cpu. @@ -164,6 +174,9 @@ def evaluate(train_state: train_utils.TrainState, # Evaluate global metrics on one of the hosts (lead_host), but given # intermediate values collected from all hosts. + # start ece metric + ece_metric = rm.metrics.ExpectedCalibrationError(num_bins=10)._metric + # store logits store_logits = config.eval_configs.get('store_logits', False) @@ -183,6 +196,13 @@ def evaluate(train_state: train_utils.TrainState, e_batch, e_logits, e_metrics, confusion_matrix, unc_confusion_matrix = eval_step_pmapped( train_state=train_state, batch=eval_batch) eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) + + probs = jax.nn.softmax(e_logits, axis=-1) + + # TODO(kellybuchanan): add masking to ece metric in rm. + # updates on each host separately + ece_metric.update_state(e_batch['label'], probs, sample_weight=e_batch['batch_mask']) + if lead_host and global_metrics_fn is not None: # Collect data to be sent for computing global metrics. eval_all_confusion_mats.append(to_cpu(confusion_matrix, all_gather=True)) @@ -194,7 +214,7 @@ def evaluate(train_state: train_utils.TrainState, end_idx = start_idx + config.batch_size logits_out[start_idx:end_idx] = e_logits inputs_out[start_idx:end_idx] = e_batch['inputs'] - labels_out[start_idx:end_idx] = e_batch['labels'] + labels_out[start_idx:end_idx] = e_batch['label'] if store_logits: f.close() @@ -218,6 +238,11 @@ def evaluate(train_state: train_utils.TrainState, prefix=prefix, ) + # Gather ece from all hosts and write value: + ece_metric = host_all_gather_metrics(ece_metric) + ece = ece_metric.result() + writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece} ) + # Visualize val predictions for one batch: if lead_host: # in eval_step we do not use all_gather in batch or logits @@ -327,6 +352,7 @@ def evaluate_ood( no_pixel_ood = jnp.sum(e_batch['label'] * e_batch['batch_mask']) == 0 if not (all_pixel_ood) and not (no_pixel_ood): + # sample weight 1 for values t include and 0 for values to exclude auc_roc.update_state( e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) auc_pr.update_state( @@ -1189,6 +1215,7 @@ def eval_ckpt( # ---------------------------------------------------------------------------- # Evaluate OOD datasets + logging.info('Evaluating OOD datasets') eval_summary_ood = evaluate_ood_step( train_state=train_state, config=config, From 8078ead64b3b3e1afd33503d6fd583ce997296ee Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 22:52:53 -0400 Subject: [PATCH 116/150] update script file to run toy experiments (run_toy_mac.sh) --- experimental/robust_segvit/run_toy_mac.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index 88e64e937..c8ec065eb 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -14,9 +14,14 @@ DATASET='street_hazards' # ---------------------------------------------------- # Set directory where outputs should be installed: # ---------------------------------------------------- -base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" +# can write results directly to gcp bucket +# base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" +dt=$(date +"%Y-%m-%d-%H-%M-%S") + +base_output_dir="results/${DATASET}" + run_name="toy_model" -output_dir="${base_output_dir}/${run_name}" +output_dir="${base_output_dir}/${run_name}/${dt}" # ---------------------------------------------------- # Set device configuration for Mac OS X platform # or TPU v2-8/v3-8 frameworks. From ccd3cf108146f83e6ed3eb43578b0d6b7499b4fa Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 22:53:51 -0400 Subject: [PATCH 117/150] add config file and wandb yaml file to call experiments --- .../street_hazards/deterministic_eval.py | 247 ++++++++++++++++++ ...run_street_hazards_deterministic_eval.yaml | 39 +++ 2 files changed, 286 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/deterministic_eval.py create mode 100755 experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py b/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py new file mode 100644 index 000000000..4d2c15128 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py @@ -0,0 +1,247 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Evaluate segmenter model on street_hazards. + + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE + +} + +# Model specs. +target_size = (720, 720) + +LOAD_PRETRAINED_BACKBONE = True +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' +target_size = (720, 720) + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID='det_run1' + +# Upstream +MODEL_PATHS = { + ('ub', 'L', 16, None, 'token', 'det_run1'): + 'gs://ub-ekb/segmenter/street_hazards/deterministic/deterministic_2022-09-27-07-32-08', +} + + +MODEL_PATH = MODEL_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_deterministic_eval' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = MODEL_PATH + config.checkpoint_configs.classifier = 'token' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = True + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + config.eval_configs.store_logits = False + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml b/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml new file mode 100755 index 000000000..194dce1e1 --- /dev/null +++ b/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml @@ -0,0 +1,39 @@ +name: street_hazards_deterministic_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 16 + config.eval_configs.store_logits: + value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/deterministic_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/deterministic_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From efa4a66cde9e75ccfc5a81c5c7aefb7ff5a7a142 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 2 Nov 2022 23:06:02 -0400 Subject: [PATCH 118/150] update batch size to fix oom issues --- .../robust_segvit/run_street_hazards_deterministic_eval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml b/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml index 194dce1e1..78e7d7ee0 100755 --- a/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml +++ b/experimental/robust_segvit/run_street_hazards_deterministic_eval.yaml @@ -15,7 +15,7 @@ parameters: config.wandb_entity : value: ${{entity}} config.batch_size: - value: 16 + value: 8 config.eval_configs.store_logits: value: false config.eval_covariate_shift: From 71716bf2102d2d6e7ba3ea2d46a185623534f7cf Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:20:30 -0400 Subject: [PATCH 119/150] (1) add comments to the multihost class and update trainer to use this class --- .../custom_segmentation_trainer.py | 110 +++--------------- .../robust_segvit/metrics_multihost.py | 41 ++++++- 2 files changed, 53 insertions(+), 98 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 14cd30bd6..f47120b9c 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -44,7 +44,6 @@ from ensemble_utils import log_average_softmax_probs # local file import from experimental.robust_segvit from inference import process_batch # local file import from experimental.robust_segvit from ood_metrics import get_ood_metrics # local file import from experimental.robust_segvit -from ood_metrics import get_ood_score # local file import from experimental.robust_segvit from pretrainer_utils import convert_torch_to_jax_checkpoint # local file import from experimental.robust_segvit from pretrainer_utils import convert_vision_transformer_to_scenic # local file import from experimental.robust_segvit from uncertainty_metrics import get_uncertainty_confusion_matrix # local file import from experimental.robust_segvit @@ -53,8 +52,9 @@ import os import resource import sys -import copy import robustness_metrics as rm +from metrics_multihost import ComputeOODAUCMetric +from metrics_multihost import host_all_gather_metrics Batch = Dict[str, jnp.ndarray] MetricFn = Callable[[jnp.ndarray, Dict[str, jnp.ndarray]], @@ -66,14 +66,6 @@ PyTree = Any -def host_all_gather_metrics(metric): - states = multihost_utils.process_allgather(metric.get_weights()) - state = jax.tree_util.tree_map(lambda x: np.sum(x, axis=0), states) - metric_copy = copy.deepcopy(metric) - metric_copy.set_weights(state) - return metric_copy - - def to_cpu(x, all_gather=False): """Send x to cpu. @@ -240,8 +232,7 @@ def evaluate(train_state: train_utils.TrainState, # Gather ece from all hosts and write value: ece_metric = host_all_gather_metrics(ece_metric) - ece = ece_metric.result() - writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece} ) + writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece_metric.result()} ) # Visualize val predictions for one batch: if lead_host: @@ -321,11 +312,10 @@ def evaluate_ood( if auc_online: # TODO(kellybuchanan): check split of data across devices. # initialize metrics: ideally in each device in each host/process/machine - # keras initializes one metric in each host because it runs in cpu - # so we need to convert to jax to run metrics in each device in each host - - auc_pr = tf.keras.metrics.AUC(curve='PR') - auc_roc = tf.keras.metrics.AUC(curve='ROC') + # keras initializes one metric in each host because it runs in cpu. + # so we need to convert the function to run metrics in each device/host. + auc_pr = ComputeOODAUCMetric(curve='PR', num_thresholds=100) + auc_roc = ComputeOODAUCMetric(curve='ROC', num_thresholds=100) # Loop through each machine: for step_ in range(steps_per_eval): @@ -333,7 +323,6 @@ def evaluate_ood( e_batch, e_logits = eval_step_pmapped( train_state=train_state, batch=eval_batch) - if store_logits: start_idx = step_ * config.batch_size end_idx = start_idx + config.batch_size @@ -341,82 +330,18 @@ def evaluate_ood( inputs_out[start_idx:end_idx] = e_batch['inputs'] labels_out[start_idx:end_idx] = e_batch['labels'] - - # In eval_step_pmapped we have not used all gather, so each metric is in - # each device and we should be able to compute devices separately - - ood_score = get_ood_score(e_logits, **kwargs) - - # skip images where all the pixels are ood or there are no ood pixels - all_pixel_ood = jnp.sum(e_batch['label'] * e_batch['batch_mask']) == 1 - no_pixel_ood = jnp.sum(e_batch['label'] * e_batch['batch_mask']) == 0 - - if not (all_pixel_ood) and not (no_pixel_ood): - # sample weight 1 for values t include and 0 for values to exclude - auc_roc.update_state( - e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) - auc_pr.update_state( - e_batch['label'], ood_score, sample_weight=e_batch['batch_mask']) + # In eval_step_pmapped we have not used all gather, so each metric is in each device + # and we should be able to compute metrics in devices separately. + auc_pr.calculate_and_update_scores(logits=e_logits, label=e_batch['label'], + sample_weight=e_batch['batch_mask'], **kwargs) + auc_roc.calculate_and_update_scores(logits=e_logits, label=e_batch['label'], + sample_weight=e_batch['batch_mask'], **kwargs) if store_logits: f.close() - # How to communicate metrics across hosts? - # Ideally we can collect auc_metrics per host, merge them, compute result. - # However, we cannot pass arbitraty class. - # jax which doesn't work with arbitrary objects - # Here we write a custom merge_state as in tf.keras.metrics - # by pulling states from tf.keras obj, combining them and putting them back - # into a keras object using list of host's auc_roc objects. - - def keras_auc_to_arrays(keras_auc_object): - """Pull out arrays from keras roc object.""" - # The thresholds used are determinisitc, so we need not store them. - tp = jnp.asarray(keras_auc_object.true_positives) - fp = jnp.asarray(keras_auc_object.false_positives) - tn = jnp.asarray(keras_auc_object.true_negatives) - fn = jnp.asarray(keras_auc_object.false_negatives) - return tp, fp, tn, fn - - def arrays_to_keras_auc(tp, fp, tn, fn, keras_auc_object): - """Assign confusion matrix arrays to a keras_auc_object.""" - keras_auc_object.true_positives.assign(tp) - keras_auc_object.false_positives.assign(fp) - keras_auc_object.true_negatives.assign(tn) - keras_auc_object.false_negatives.assign(fn) - return keras_auc_object - - auc_roc_state = keras_auc_to_arrays(auc_roc) - auc_pr_state = keras_auc_to_arrays(auc_pr) - - def combine_states(all_auc_states): - # jax can take in trees of arrays, tuple is considered a tree so we can - # unpack it here. - # each array here has dimensions #host x shape - - all_tp, all_fp, all_tn, all_fn = all_auc_states - - assert all_tp.shape == (jax.process_count(), 200) - assert all_fp.shape == (jax.process_count(), 200) - assert all_tn.shape == (jax.process_count(), 200) - assert all_fn.shape == (jax.process_count(), 200) - - tp = jnp.sum(all_tp, 0) - fp = jnp.sum(all_fp, 0) - tn = jnp.sum(all_tn, 0) - fn = jnp.sum(all_fn, 0) - - return tp, fp, tn, fn - - # Gather the data across all hosts. - all_auc_roc_states = multihost_utils.process_allgather(auc_roc_state) - all_auc_pr_states = multihost_utils.process_allgather(auc_pr_state) - - # Below we pick the first device. - auc_roc = arrays_to_keras_auc(*combine_states(all_auc_roc_states), auc_roc) - auc_pr = arrays_to_keras_auc(*combine_states(all_auc_pr_states), auc_pr) - - eval_summary = {'auroc': float(auc_roc.result().numpy()), - 'auprc': float(auc_pr.result().numpy()), + + eval_summary = {'auroc': float(auc_roc.gather_metrics()), + 'auprc': float(auc_pr.gather_metrics()), } else: @@ -430,7 +355,7 @@ def combine_states(all_auc_states): e_batch, e_logits = eval_step_pmapped( train_state=train_state, batch=eval_batch) - # Store all logits in cpu + # Store all logits in cpu: if lead_host: e_batch = to_cpu(e_batch, all_gather=False) e_logits = to_cpu(e_logits, all_gather=False) @@ -449,6 +374,7 @@ def combine_states(all_auc_states): ood_mask=eval_ood_labels, weights=eval_ood_masks, **kwargs) + ############### LOG EVAL SUMMARY ############### writer.write_scalars( step, { diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py index e12d2b4cd..5a81b5716 100644 --- a/experimental/robust_segvit/metrics_multihost.py +++ b/experimental/robust_segvit/metrics_multihost.py @@ -1,4 +1,12 @@ -"""Calculate ood metrics across hosts.""" +"""Calculate ood metrics across hosts. + +# How to communicate metrics across hosts? +# Ideally we can collect auc_metrics per host, merge them, compute result. +# However, we cannot pass arbitraty class. +# jax which doesn't work with arbitrary objects. + + +""" from typing import Any, Optional, Dict import jax @@ -7,8 +15,13 @@ from jax.experimental import multihost_utils from ood_metrics import get_ood_score from ood_metrics import get_score +import numpy as np +import copy +# Here we write a custom merge_state as in tf.keras.metrics +# by pulling states from tf.keras obj, combining them and putting them back +# into a keras object using list of host's auc_roc objects. def keras_auc_to_arrays(keras_auc_object): """Pull out arrays from keras roc object.""" # The thresholds used are determinisitc, so we need not store them. @@ -48,6 +61,14 @@ def combine_states(all_auc_states, num_thresholds=200): return tp, fp, tn, fn +def host_all_gather_metrics(metric): + states = multihost_utils.process_allgather(metric.get_weights()) + state = jax.tree_util.tree_map(lambda x: np.sum(x, axis=0), states) + metric_copy = copy.deepcopy(metric) + metric_copy.set_weights(state) + return metric_copy + + class ComputeAUCMetric: """Calculate auc metrics across multiple hosts.""" def __init__(self, curve, num_thresholds=200, from_logits=False): @@ -76,7 +97,15 @@ def gather_metrics(self): class ComputeOODAUCMetric: - """Calculate auc metrics across multiple hosts.""" + """Calculate auc metrics across multiple hosts. + + Args: + curve: 'ROC' or 'PR' for the type of AUC. + num_thresholds: Number of thresholds to use for discretizing the roc curve. + from_logits: Whether `y_pred` is expected to be a logits tensor. If it is a logits tensor, + a sigmoid function is applied to the logits. + + """ def __init__(self, curve, num_thresholds=200): self.curve = curve self.num_thresholds = num_thresholds @@ -85,8 +114,8 @@ def __init__(self, curve, num_thresholds=200): from_logits=self.from_logits, num_thresholds=self.num_thresholds) - def calculate_and_update_scores(self, logits, label, sample_weight, *kwargs): - ood_score = get_ood_score(logits, *kwargs) + def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): + ood_score = get_ood_score(logits, **kwargs) # skip images where all the pixels are ood or there are no ood pixels all_pixel_ood = jnp.sum(label*sample_weight) == 1 @@ -124,8 +153,8 @@ def __init__(self, curve, num_thresholds=200, summation_method='interpolation',t thresholds=self.thresholds) def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): - " label 1 for ood pixel and 0 is otherwise " - conf = - 1 * get_score(logits=logits, **kwargs) + " label 1 for ood pixel and 0 is otherwise." + conf = - 1 * get_score(logits, **kwargs) # skip images where all the pixels are ood or there are no ood pixels all_pixel_ood = jnp.sum(label*sample_weight) == 1 From 74caf1e6dc05c00b7837f844af93d59bb389c083 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:34:15 -0400 Subject: [PATCH 120/150] merge checkpoint code --- .../robust_segvit/checkpoint_utils.py | 71 +++++++++++++++++++ .../custom_segmentation_trainer.py | 60 ++-------------- 2 files changed, 75 insertions(+), 56 deletions(-) create mode 100644 experimental/robust_segvit/checkpoint_utils.py diff --git a/experimental/robust_segvit/checkpoint_utils.py b/experimental/robust_segvit/checkpoint_utils.py new file mode 100644 index 000000000..eeb59d054 --- /dev/null +++ b/experimental/robust_segvit/checkpoint_utils.py @@ -0,0 +1,71 @@ +# load checkpoints +from scenic.train_lib_deprecated import train_utils +from pretrainer_utils import convert_torch_to_jax_checkpoint # local file import from experimental.robust_segvit +from scenic.train_lib_deprecated import pretrain_utils +from pretrainer_utils import convert_vision_transformer_to_scenic # local file import from experimental.robust_segvit + + +def load_checkpoints_eval(config, model, train_state, workdir): + checkpoint_configs = config.get('checkpoint_configs', False) + if checkpoint_configs: + # Load torch weights + if 'torch' in checkpoint_configs.checkpoint_format: + + bb_train_state = convert_torch_to_jax_checkpoint( + checkpoint_path=checkpoint_configs.checkpoint_path, + config=checkpoint_configs) + + train_state = model.init_backbone_from_train_state( + train_state, + bb_train_state, + config, + checkpoint_configs + ) + del bb_train_state + + # Load weights in checkpoint_path or workdir + else: + checkpoint_path = checkpoint_configs.get('checkpoint_path', workdir) + train_state, _ = train_utils.restore_checkpoint( + checkpoint_path, train_state) + return train_state + + +def load_checkpoints_backbone(config, model, train_state, workdir): + del workdir + # TODO(kellybuchanan): check out partial loader in + # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# + restored_model_cfg = config.get('pretrained_backbone_configs') + + # Load pretrained backbone + if restored_model_cfg.checkpoint_format in ('ub', 'big_vision', 'scenic'): + # load params from checkpoint + bb_train_state = pretrain_utils.convert_big_vision_to_scenic_checkpoint( + checkpoint_path=restored_model_cfg.checkpoint_path, + convert_to_linen=False) + + train_state = model.init_backbone_from_train_state( + train_state, + bb_train_state, + config, + restored_model_cfg, + model_prefix_path=['backbone']) + # Free unnecessary memory. + del bb_train_state + # Loader from scenic + elif restored_model_cfg.checkpoint_format in ('vision_transformer'): + # load params from checkpoint + bb_train_state = convert_vision_transformer_to_scenic(checkpoint_path=restored_model_cfg.checkpoint_path, convert_to_linen=False) + + train_state = model.init_backbone_from_train_state( + train_state, + bb_train_state, + config, + restored_model_cfg, + model_prefix_path=['backbone']) + + # Free unnecessary memory. + del bb_train_state + else: + raise NotImplementedError('') + return train_state diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index f47120b9c..b45544e1e 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -47,7 +47,8 @@ from pretrainer_utils import convert_torch_to_jax_checkpoint # local file import from experimental.robust_segvit from pretrainer_utils import convert_vision_transformer_to_scenic # local file import from experimental.robust_segvit from uncertainty_metrics import get_uncertainty_confusion_matrix # local file import from experimental.robust_segvit - +from checkpoint_utils import load_checkpoints_eval +from checkpoint_utils import load_checkpoints_backbone import h5py import os import resource @@ -756,41 +757,7 @@ def train( # Load pretrained backbone if start_step == 0 and config.get('load_pretrained_backbone', False): - # TODO(kellybuchanan): check out partial loader in - # https://github.com/google/uncertainty-baselines/commit/083b1dcc52bb1964f8917d15552ece8848d582ae# - restored_model_cfg = config.get('pretrained_backbone_configs') - - # Loader from scenic - if restored_model_cfg.checkpoint_format in ('ub', 'big_vision', 'scenic'): - # load params from checkpoint - bb_train_state = pretrain_utils.convert_big_vision_to_scenic_checkpoint( - checkpoint_path=restored_model_cfg.checkpoint_path, - convert_to_linen=False) - - train_state = model.init_backbone_from_train_state( - train_state, - bb_train_state, - config, - restored_model_cfg, - model_prefix_path=['backbone']) - # Free unnecessary memory. - del bb_train_state - # Loader from scenic - elif restored_model_cfg.checkpoint_format in ('vision_transformer'): - # load params from checkpoint - bb_train_state = convert_vision_transformer_to_scenic(checkpoint_path=restored_model_cfg.checkpoint_path, convert_to_linen=False) - - train_state = model.init_backbone_from_train_state( - train_state, - bb_train_state, - config, - restored_model_cfg, - model_prefix_path=['backbone']) - - # Free unnecessary memory. - del bb_train_state - else: - raise NotImplementedError('') + train_state = load_checkpoints_backbone(config, model, train_state, workdir) elif start_step == 0: logging.info('Not restoring from any pretrained_backbone.') @@ -1076,26 +1043,7 @@ def eval_ckpt( checkpoint_configs = config.get('checkpoint_configs', False) if checkpoint_configs: - # Load torch weights - if 'torch' in checkpoint_configs.checkpoint_format: - - bb_train_state = convert_torch_to_jax_checkpoint( - checkpoint_path=checkpoint_configs.checkpoint_path, - config=checkpoint_configs) - - train_state = model.init_backbone_from_train_state( - train_state, - bb_train_state, - config, - checkpoint_configs - ) - del bb_train_state - - # Load weights in checkpoint_path or workdir - else: - checkpoint_path = checkpoint_configs.get('checkpoint_path', workdir) - train_state, _ = train_utils.restore_checkpoint( - checkpoint_path, train_state) + train_state = load_checkpoints_eval(config, model, train_state, workdir) else: logging.info('Not loading any checkpoints') From 68f1239aa1208e828849a83cb050bd30934430f6 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:35:39 -0400 Subject: [PATCH 121/150] use (-1) as a scaling factor for ood --- experimental/robust_segvit/custom_segmentation_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index b45544e1e..accb87aa4 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -54,7 +54,7 @@ import resource import sys import robustness_metrics as rm -from metrics_multihost import ComputeOODAUCMetric +from metrics_multihost import ComputeOODAUCMetric, ComputeScoreAUCMetric from metrics_multihost import host_all_gather_metrics Batch = Dict[str, jnp.ndarray] @@ -315,8 +315,8 @@ def evaluate_ood( # initialize metrics: ideally in each device in each host/process/machine # keras initializes one metric in each host because it runs in cpu. # so we need to convert the function to run metrics in each device/host. - auc_pr = ComputeOODAUCMetric(curve='PR', num_thresholds=100) - auc_roc = ComputeOODAUCMetric(curve='ROC', num_thresholds=100) + auc_pr = ComputeScoreAUCMetric(curve='PR', num_thresholds=100) + auc_roc = ComputeScoreAUCMetric(curve='ROC', num_thresholds=100) # Loop through each machine: for step_ in range(steps_per_eval): From 7aa4efd0fbc7eb279e7f4e3b98be459ee988d743 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:51:29 -0400 Subject: [PATCH 122/150] use ood score without (-1) factor --- experimental/robust_segvit/custom_segmentation_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index accb87aa4..9f44e1e94 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -315,8 +315,8 @@ def evaluate_ood( # initialize metrics: ideally in each device in each host/process/machine # keras initializes one metric in each host because it runs in cpu. # so we need to convert the function to run metrics in each device/host. - auc_pr = ComputeScoreAUCMetric(curve='PR', num_thresholds=100) - auc_roc = ComputeScoreAUCMetric(curve='ROC', num_thresholds=100) + auc_pr = ComputeOODAUCMetric(curve='PR', num_thresholds=100) + auc_roc = ComputeOODAUCMetric(curve='ROC', num_thresholds=100) # Loop through each machine: for step_ in range(steps_per_eval): From e20c435a15bf8ec781095fd4ddedf4724642a376 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:56:20 -0400 Subject: [PATCH 123/150] remove dropout factor in deterministic_eval --- .../robust_segvit/configs/street_hazards/deterministic_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py b/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py index 4d2c15128..17e4493a5 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic_eval.py @@ -114,7 +114,7 @@ def get_config(runlocal=''): config.model.backbone.num_heads = num_heads config.model.backbone.num_layers = num_layers config.model.backbone.hidden_size = hidden_size - config.model.backbone.dropout_rate = 0.1 + config.model.backbone.dropout_rate = 0.0 config.model.backbone.attention_dropout_rate = 0.0 config.model.backbone.classifier = CLASSIFIER From 748e4b0e0c3d96b885d68cc5362bf67da5983f03 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 01:58:16 -0400 Subject: [PATCH 124/150] add models to evaluate the performance of the gp model --- .../configs/street_hazards/gp_eval.py | 260 ++++++++++++++++++ ...azards.yaml => run_street_hazards_gp.yaml} | 0 .../run_street_hazards_gp_eval.yaml | 39 +++ 3 files changed, 299 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/gp_eval.py rename experimental/robust_segvit/{run_gp_street_hazards.yaml => run_street_hazards_gp.yaml} (100%) create mode 100755 experimental/robust_segvit/run_street_hazards_gp_eval.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/gp_eval.py b/experimental/robust_segvit/configs/street_hazards/gp_eval.py new file mode 100644 index 000000000..6ab27e8f5 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/gp_eval.py @@ -0,0 +1,260 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Evaluate segmenter_gp model on street_hazards. + + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE + +} + +# Model specs. +target_size = (720, 720) + +LOAD_PRETRAINED_BACKBONE = True +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' +target_size = (720, 720) + +CHECKPOINT_ORIGIN = 'ub' +EXPERIMENTID='gp_run1' + +# Upstream +MODEL_PATHS = { + ('ub', 'L', 16, None, 'token', 'gp_run1'): + 'gs://ub-ekb/segmenter/street_hazards/gp/gp_2022-10-03-15-05-54', +} + + +MODEL_PATH = MODEL_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for ADE20k_ind segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_gp_eval' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.0 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'gp' + + # GP layer params + config.model.decoder.gp_layer = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.covmat_kwargs.ridge_penalty = 1. + # Disable momentum in order to use exact covariance update for finetuning. + # Disable to allow exact cov update. + config.model.decoder.gp_layer.covmat_kwargs.momentum = 0.99 + config.model.decoder.mean_field_factor = 1. + # Additional params + config.model.decoder.gp_layer.normalize_input = True + config.model.decoder.gp_layer.hidden_kwargs = ml_collections.ConfigDict() + config.model.decoder.gp_layer.hidden_kwargs.feature_scale = 1. + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # Load checkpoint + config.checkpoint_configs = ml_collections.ConfigDict() + config.checkpoint_configs.checkpoint_format = CHECKPOINT_ORIGIN + config.checkpoint_configs.checkpoint_path = MODEL_PATH + config.checkpoint_configs.classifier = 'token' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = True + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + config.eval_configs.store_logits = False + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/run_gp_street_hazards.yaml b/experimental/robust_segvit/run_street_hazards_gp.yaml similarity index 100% rename from experimental/robust_segvit/run_gp_street_hazards.yaml rename to experimental/robust_segvit/run_street_hazards_gp.yaml diff --git a/experimental/robust_segvit/run_street_hazards_gp_eval.yaml b/experimental/robust_segvit/run_street_hazards_gp_eval.yaml new file mode 100755 index 000000000..988f79c2b --- /dev/null +++ b/experimental/robust_segvit/run_street_hazards_gp_eval.yaml @@ -0,0 +1,39 @@ +name: street_hazards_gp_eval +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 8 + config.eval_configs.store_logits: + value: false + config.eval_covariate_shift: + value: false + config.eval_robustness_configs.method_name: + value: 'msp' + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/gp_eval.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/gp_eval" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 289caf02d95e27853a633d406461ec97a68c0820 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 02:28:35 -0400 Subject: [PATCH 125/150] add code to train het model on street hazatds dataset --- .../configs/street_hazards/het.py | 276 ++++++++++++++++++ .../robust_segvit/run_street_hazards_het.yaml | 35 +++ 2 files changed, 311 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/het.py create mode 100755 experimental/robust_segvit/run_street_hazards_het.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/het.py b/experimental/robust_segvit/configs/street_hazards/het.py new file mode 100644 index 000000000..4471c6482 --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/het.py @@ -0,0 +1,276 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on ade20k_ind. + +Compare performance from deterministic upstream checkpoints. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE, +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (720, 720) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for street hazards segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_ind_segmenter_het_hyper' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'het' + + # Het layer params + # temp: wide sweep [0.15, 0.3, 0.5, 0.75, 1.0, 1.5, 2.0] + config.model.decoder.temperature = 1.0 + # efficient low rank approx ~ FxK where K is the classes. False for K<20. + config.model.decoder.param_efficient = False + # F as a low rank approx of KxK matrix has num_factors: + # imagenet~15, jft~50, cifar~6, cityscapes~sweep(5-10). + config.model.decoder.num_factors = 5 + # mc_samples: use as much as can be afforded, ideally > 10. + config.model.decoder.mc_samples = 1000 + config.model.decoder.return_locs = False + # turn on to run an approx on KHW x KHW instead of KxK. + config.model.decoder.share_samples_across_batch = False + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 3e-5 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = False + config.eval_label_shift = False + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for doing grid search.""" + parameters = [ + hyper.sweep('config.model.decoder.num_factors', + hyper.discrete([5, 10, 20, 50])), + hyper.sweep('config.model.decoder.temperature', + [0.15, 0.3, 0.5, 0.75, 1.0, 1.5, 2.0]), + hyper.sweep('config.model.decoder.share_samples_across_batch', + [True, False]), + hyper.sweep('config.model.decoder.param_efficient', + [True, False]), + ] + + return hyper.product(parameters) diff --git a/experimental/robust_segvit/run_street_hazards_het.yaml b/experimental/robust_segvit/run_street_hazards_het.yaml new file mode 100755 index 000000000..af4fd2799 --- /dev/null +++ b/experimental/robust_segvit/run_street_hazards_het.yaml @@ -0,0 +1,35 @@ +name: het_street_hazards_hparam +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 24 + config.model.decoder.temperature: + values: [0.15, 0.3, 1, 1.5, 2.0] + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/het.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/het" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 0545d59d34563e6b83ca15f45820a1dd1b56cb7b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 02:38:11 -0400 Subject: [PATCH 126/150] fix default lr for het model given det model results --- experimental/robust_segvit/configs/street_hazards/het.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/configs/street_hazards/het.py b/experimental/robust_segvit/configs/street_hazards/het.py index 4471c6482..0a801eaf3 100644 --- a/experimental/robust_segvit/configs/street_hazards/het.py +++ b/experimental/robust_segvit/configs/street_hazards/het.py @@ -160,7 +160,7 @@ def get_config(runlocal=''): config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') config.lr_configs.steps_per_cycle = config.get_ref( 'num_training_epochs') * config.get_ref('steps_per_epoch') - config.lr_configs.base_learning_rate = 3e-5 + config.lr_configs.base_learning_rate = 1e-4 # model and data dtype config.model_dtype_str = 'float32' From ce513b96f31e312d743d77e692742528ee4420c5 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 3 Nov 2022 02:43:59 -0400 Subject: [PATCH 127/150] add config file to train be model on street hazards --- .../configs/street_hazards/be.py | 265 ++++++++++++++++++ .../robust_segvit/run_street_hazards_be.yaml | 37 +++ 2 files changed, 302 insertions(+) create mode 100644 experimental/robust_segvit/configs/street_hazards/be.py create mode 100755 experimental/robust_segvit/run_street_hazards_be.yaml diff --git a/experimental/robust_segvit/configs/street_hazards/be.py b/experimental/robust_segvit/configs/street_hazards/be.py new file mode 100644 index 000000000..7554ae85d --- /dev/null +++ b/experimental/robust_segvit/configs/street_hazards/be.py @@ -0,0 +1,265 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Train segmenter model on street_hazards. + +Compare performance from deterministic upstream checkpoints. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +_CITYSCAPES_FINE_TRAIN_SIZE = 2975 +_CITYSCAPES_COARSE_TRAIN_SIZE = 19998 + +_ADE20K_TRAIN_SIZE = 20210 +_PASCAL_VOC_TRAIN_SIZE = 10582 +_PASCAL_CONTEXT_TRAIN_SIZE = 4998 +_STREET_HAZARDS_TRAIN_SIZE = 5125 + +TRAIN_SIZES = { + 'cityscapes': _CITYSCAPES_FINE_TRAIN_SIZE, + 'ade20k': _ADE20K_TRAIN_SIZE, + 'ade20k_ind': _ADE20K_TRAIN_SIZE, + 'pascal_voc': _PASCAL_VOC_TRAIN_SIZE, + 'pascal_context': _PASCAL_CONTEXT_TRAIN_SIZE, + 'street_hazards': _STREET_HAZARDS_TRAIN_SIZE, + +} + +# Model specs. +LOAD_PRETRAINED_BACKBONE = True +BACKBONE_ORIGIN = 'vision_transformer' +VIT_SIZE = 'L' +STRIDE = 16 +RESNET_SIZE = None +CLASSIFIER = 'token' +target_size = (720, 720) +UPSTREAM_TASK = 'augreg+i21k+imagenet2012' + + +# Upstream +MODEL_PATHS = { + + # Imagenet 21k + finetune in imagenet2012 with perf 0.85 adap_res 384 + ('vision_transformer', 'L', 16, None, 'token', 'i21k+imagenet2012'): + 'gs://vit_models/imagenet21k+imagenet2012/ViT-L_16.npz', + ('vision_transformer', 'L', 16, None, 'token', 'augreg+i21k+imagenet2012'): + 'gs://vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz', # pylint: disable=g-long-lambda + +} + + +MODEL_PATH = MODEL_PATHS[(BACKBONE_ORIGIN, VIT_SIZE, STRIDE, RESNET_SIZE, + CLASSIFIER, UPSTREAM_TASK)] + +if VIT_SIZE == 'B': + mlp_dim = 3072 + num_heads = 12 + num_layers = 12 + hidden_size = 768 +elif VIT_SIZE == 'L': + mlp_dim = 4096 + num_heads = 16 + num_layers = 24 + hidden_size = 1024 + +TRAIN_SAMPLES = 32 + + +def get_config(runlocal=''): + """Returns the configuration for street hazards segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'street_hazards_segmenter_be' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + config.dataset_configs.train_split = 'train' + config.dataset_configs.name = 'street_hazards' + config.dataset_configs.dataset_name = '' # ood name flag to write in eval. + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit_be' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = CLASSIFIER + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear_be' + + # BE variables + config.model.backbone.ens_size = 3 + config.model.backbone.random_sign_init = -0.5 + config.model.backbone.be_layers = (22, 23) + config.fast_weight_lr_multiplier = 1.0 + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(100) + config.batch_size = 32 + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.num_train_examples = TRAIN_SIZES.get(config.dataset_configs.name) + config.steps_per_epoch = config.get_ref( + 'num_train_examples') // config.get_ref('batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 1 * config.get_ref('steps_per_epoch') + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # load pretrained backbone + config.load_pretrained_backbone = LOAD_PRETRAINED_BACKBONE + config.pretrained_backbone_configs = ml_collections.ConfigDict() + config.pretrained_backbone_configs.checkpoint_format = BACKBONE_ORIGIN + config.pretrained_backbone_configs.checkpoint_path = MODEL_PATH + config.pretrained_backbone_configs.token_init = True + config.pretrained_backbone_configs.classifier = 'token' + config.pretrained_backbone_configs.backbone_type = 'vit' + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_mode = False + config.eval_covariate_shift = True + config.eval_label_shift = True + config.model.input_shape = target_size + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + config.dataset_configs.train_target_size = (128, 128) + config.model.input_shape = config.dataset_configs.train_target_size + config.batch_size = 8 + config.num_training_epochs = 5 + config.warmup_steps = 0 + config.dataset_configs.train_split = f'train[:{TRAIN_SAMPLES}]' + config.dataset_configs.validation_split = f'validation[:{TRAIN_SAMPLES}]' + config.num_train_examples = TRAIN_SAMPLES + + return config + + +def checkpoint(hyper, backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task): + """Defines checkpoints for sweep.""" + overwrites = [] + if resnet_size is not None: + raise NotImplementedError('') + else: + overwrites.append( + hyper.sweep('config.model.patches', [{ + 'size': (stride, stride) + }])) + + if vit_size == 'B': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [3072])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [12])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [12])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [768])) + elif vit_size == 'L': + overwrites.append(hyper.sweep('config.model.backbone.mlp_dim', [4096])) + overwrites.append(hyper.sweep('config.model.backbone.num_heads', [16])) + overwrites.append(hyper.sweep('config.model.backbone.num_layers', [24])) + overwrites.append(hyper.sweep('config.model.backbone.hidden_size', [1024])) + else: + raise NotImplementedError('') + + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_format', + [backbone_origin])) + overwrites.append( + hyper.sweep('config.pretrained_backbone_configs.checkpoint_path', [ + MODEL_PATHS[(backbone_origin, vit_size, stride, resnet_size, + classifier, upstream_task)] + ])) + + return hyper.product(overwrites) + + +def get_sweep(hyper): + """Defines the hyper-parameters sweeps for grid search.""" + + random_sign_init = hyper.sweep('config.model.backbone.random_sign_init', + [-0.5, 0.5]) + fast_weight_lr_multiplier = hyper.sweep('config.fast_weight_lr_multiplier', + [0.5, 1.0, 2.0]) + + return hyper.product([random_sign_init, fast_weight_lr_multiplier]) diff --git a/experimental/robust_segvit/run_street_hazards_be.yaml b/experimental/robust_segvit/run_street_hazards_be.yaml new file mode 100755 index 000000000..98742c2c9 --- /dev/null +++ b/experimental/robust_segvit/run_street_hazards_be.yaml @@ -0,0 +1,37 @@ +name: be_street_hazards_hparam +program: deterministic.py +method: grid +project: rdl-debug +entity: ekellbuch + +metric: + name: valid_loss + goal: minimize +parameters: + config.use_wandb: + value: true + config.wandb_project : + value: ${{project}} + config.wandb_entity : + value: ${{entity}} + config.batch_size: + value: 24 + config.model.backbone.random_sign_init: + values: [-0.5, -0.25, 0.25, 0.5] + config.fast_weight_lr_multiplier: + values: [0.5, 1.0, 2.0] + + +command: + - ${env} + - python + - ${program} + - "--config" + - "configs/street_hazards/be.py" + - "--output_dir" + - "gs://ub-ekb/segmenter/street_hazards/be" + - "--num_cores" + - "8" + - "--tpu" + - "local" + - ${args} \ No newline at end of file From 7b16a725d271fc44cbc6fb587e96be008bf1b742 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Fri, 4 Nov 2022 14:41:19 -0400 Subject: [PATCH 128/150] add calibration AUCROC metric --- .../custom_segmentation_trainer.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 9f44e1e94..c3577d342 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -167,8 +167,10 @@ def evaluate(train_state: train_utils.TrainState, # Evaluate global metrics on one of the hosts (lead_host), but given # intermediate values collected from all hosts. - # start ece metric - ece_metric = rm.metrics.ExpectedCalibrationError(num_bins=10)._metric + # setup calibration evaluation + ece_num_bins = config.get('ece_num_bins', 15) + ece_metric = rm.metrics.ExpectedCalibrationError(num_bins=ece_num_bins)._metric + calib_auc = rm.metrics.CalibrationAUC(correct_pred_as_pos_label=False)._metric # store logits store_logits = config.eval_configs.get('store_logits', False) @@ -191,10 +193,11 @@ def evaluate(train_state: train_utils.TrainState, eval_metrics.append(train_utils.unreplicate_and_get(e_metrics)) probs = jax.nn.softmax(e_logits, axis=-1) - - # TODO(kellybuchanan): add masking to ece metric in rm. # updates on each host separately - ece_metric.update_state(e_batch['label'], probs, sample_weight=e_batch['batch_mask']) + ece_metric.update_state(labels=e_batch['label'], probabilities=probs, sample_weight=e_batch['batch_mask']) + y_pred = jnp.argmax(probs, axis=-1) # predicted label indices + confidence = jnp.max(probs, axis=-1) # confidence score for predicted labels + calib_auc.update_state(y_true=e_batch['label'], y_pred=y_pred, confidence=confidence, sample_weight=e_batch['batch_mask']) if lead_host and global_metrics_fn is not None: # Collect data to be sent for computing global metrics. @@ -231,9 +234,12 @@ def evaluate(train_state: train_utils.TrainState, prefix=prefix, ) - # Gather ece from all hosts and write value: + # Gather uncertainty metrics from all hosts and write value: ece_metric = host_all_gather_metrics(ece_metric) - writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece_metric.result()} ) + calib_auc = host_all_gather_metrics(calib_auc) + writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece_metric.result(), + '{}_calib_auc'.format(prefix): calib_auc.result(), + } ) # Visualize val predictions for one batch: if lead_host: From df0b06d5e84ce14e9760fb754cf8eb477c651ac9 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 7 Nov 2022 17:31:44 -0500 Subject: [PATCH 129/150] use softmax call --- experimental/robust_segvit/uncertainty_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/uncertainty_metrics.py b/experimental/robust_segvit/uncertainty_metrics.py index 7f0d3c66d..e22934c9f 100644 --- a/experimental/robust_segvit/uncertainty_metrics.py +++ b/experimental/robust_segvit/uncertainty_metrics.py @@ -15,6 +15,7 @@ """Calculate uncertainty metrics for segmentation tasks.""" from typing import Optional, Tuple +import jax from jax import lax import jax.numpy as jnp from scenic.model_lib.layers import nn_ops @@ -137,7 +138,7 @@ def get_uncertainty_confusion_matrix( # Calculate uncertainty map: if uncertainty_measure == 'softmax': - uncertainty_map = jnp.max(jnp.exp(logits) / jnp.sum(jnp.exp(logits), -1, keepdims=True), -1) + uncertainty_map = jnp.max(jax.nn.softmax(logits, -1), -1) elif uncertainty_measure == 'entropy': uncertainty_map = get_entropy_from_logits(logits) else: From e393f3cac1975c69ff5af2ac3e886d2903cedfcc Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Mon, 7 Nov 2022 17:36:01 -0500 Subject: [PATCH 130/150] remove unnecessary call to del params --- experimental/robust_segvit/pretrainer_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/experimental/robust_segvit/pretrainer_utils.py b/experimental/robust_segvit/pretrainer_utils.py index 2e6217349..a3b1d4d76 100644 --- a/experimental/robust_segvit/pretrainer_utils.py +++ b/experimental/robust_segvit/pretrainer_utils.py @@ -198,8 +198,6 @@ def convert_torch_to_jax_checkpoint( optimizer={"target": restored_params},) # pytype: enable=wrong-arg-types - # free memory - del restored_params return restored_train_state From 9bcacca76b6527e8c0d8040fa012ee2ac8f62684 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Nov 2022 15:39:45 -0500 Subject: [PATCH 131/150] add script code to call eval in all models for all datasets --- experimental/robust_segvit/run_eval.sh | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 experimental/robust_segvit/run_eval.sh diff --git a/experimental/robust_segvit/run_eval.sh b/experimental/robust_segvit/run_eval.sh new file mode 100644 index 000000000..f2f120687 --- /dev/null +++ b/experimental/robust_segvit/run_eval.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# call eval model using wandb + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +# default config for eval +eval_covariate_shift=False +method_name='msp' +use_wandb=True + +for dataset in "ade20k_ind" "street_hazards" "cityscapes" +do +for model in "gp" "be" "deterministic" "het" +do +base_output_dir="gs://ub-ekb/segmenter/${dataset}" +config_file="configs/${dataset}/${model}_eval.py" +run_name="${model}_eval" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--config.eval_robustness_configs.method_name=${method_name} \ +--config.eval_covariate_shift=${eval_covariate_shift} \ +--config.use_wandb=${use_wandb} \ +--tpu=${tpu} \ + +done +done From dc9c4c015d35d0af022a1e2905acdc0a691d640b Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Nov 2022 18:48:28 -0500 Subject: [PATCH 132/150] update the prefix for the ood metrics to easily plot these --- .../custom_segmentation_trainer.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index c3577d342..054f70084 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1287,7 +1287,7 @@ def evaluate_cityscapes_c( ood_config.update({'dataset_name': 'cityscapes_variants'}) accuracy_per_corruption = {} - prefix = 'citycvalid' + prefix = 'cityc' for corruption in cityscapes_variants.CITYSCAPES_C_CORRUPTIONS: local_list = [] # list to compute macro average per corruption for severity in cityscapes_variants.CITYSCAPES_C_SEVERITIES: @@ -1298,7 +1298,7 @@ def evaluate_cityscapes_c( rng, data_rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) dataset.meta_data['dataset_name'] = 'cityscapes_c' - dataset.meta_data['prefix'] = prefix + f'_{corruption}_{severity}' + dataset.meta_data['prefix'] = prefix + f'/{corruption}_{severity}/valid' eval_summary = evaluate( train_state=train_state, @@ -1325,7 +1325,7 @@ def evaluate_cityscapes_c( # append name to metrics key_separator = '_' avg_cityscapes_c_metrics = { - key_separator.join((prefix, key)): val + key_separator.join((prefix + '/valid', key)): val for key, val in cityscapes_c_metrics.items() } # update metrics @@ -1371,7 +1371,7 @@ def evaluate_fishyscapes( device_count = jax.device_count() accuracy_per_corruption = {} - prefix = 'fishyvalid' + prefix = 'fishyscapes' for corruption in cityscapes_variants.FISHYSCAPES_CORRUPTIONS: with ood_config.unlocked(): @@ -1381,7 +1381,7 @@ def evaluate_fishyscapes( data_rng, rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) dataset.meta_data['dataset_name'] = 'fishyscapes' - dataset.meta_data['prefix'] = prefix + f'_{corruption}' + dataset.meta_data['prefix'] = prefix + f'/{corruption}/valid' eval_summary = evaluate_ood( train_state=train_state, @@ -1404,7 +1404,7 @@ def evaluate_fishyscapes( # append name to metrics key_separator = '_' avg_fishyscapes_metrics = { - key_separator.join((prefix, key)): val + key_separator.join((prefix +'/valid', key)): val for key, val in fishyscapes_metrics.items() } # update metrics @@ -1449,7 +1449,7 @@ def evaluate_ade20k_ood_open( ood_config.update({'dataset_name': 'robust_segvit_segmentation'}) device_count = jax.device_count() - prefix = 'ade20k_ood_open' + prefix = 'ade20k_ood_open/valid' with ood_config.unlocked(): ood_config.dataset_configs.name = 'ade20k_ood_open' @@ -1472,7 +1472,7 @@ def evaluate_ade20k_ood_open( **config.get('eval_robustness_configs', {}), ) - # append name to metrics + # append name to metrics: key_separator = '_' avg_open_set_metrics = { key_separator.join((prefix, key)): val @@ -1536,7 +1536,7 @@ def evaluate_ade20k_corrupted( data_rng, rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) - dataset.meta_data['prefix'] = prefix + f'_{corruption}_{severity}' + dataset.meta_data['prefix'] = prefix + f'/{corruption}_{severity}/valid' eval_summary = evaluate( train_state=train_state, @@ -1563,7 +1563,7 @@ def evaluate_ade20k_corrupted( # append name to metrics key_separator = '_' avg_corrupted_metrics = { - key_separator.join((prefix, key)): val + key_separator.join((prefix + '/valid', key)): val for key, val in ade20k_c_metrics.items() } # update metrics @@ -1608,7 +1608,7 @@ def evaluate_street_hazards_ood_open( ood_config.update({'dataset_name': 'robust_segvit_segmentation'}) device_count = jax.device_count() - prefix = 'street_hazards_open' + prefix = 'street_hazards_open/valid' with ood_config.unlocked(): ood_config.dataset_configs.name = 'street_hazards_open' From b665aa07658f0616ff4bf5f6b8f4f57ae2bddd5d Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Nov 2022 19:09:28 -0500 Subject: [PATCH 133/150] update prefix to group metrics according to corruption/level instead of corruption_level --- experimental/robust_segvit/custom_segmentation_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 054f70084..ab03fd44c 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1298,7 +1298,7 @@ def evaluate_cityscapes_c( rng, data_rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) dataset.meta_data['dataset_name'] = 'cityscapes_c' - dataset.meta_data['prefix'] = prefix + f'/{corruption}_{severity}/valid' + dataset.meta_data['prefix'] = prefix + f'/{corruption}/{severity}/valid' eval_summary = evaluate( train_state=train_state, @@ -1536,7 +1536,7 @@ def evaluate_ade20k_corrupted( data_rng, rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) - dataset.meta_data['prefix'] = prefix + f'/{corruption}_{severity}/valid' + dataset.meta_data['prefix'] = prefix + f'/{corruption}/{severity}/valid' eval_summary = evaluate( train_state=train_state, From 3c73e26b5ed1e48a479b7a6aa2463b69d1c50a5c Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Nov 2022 19:21:24 -0500 Subject: [PATCH 134/150] update trainer to call the same data loader for any dataset + include eval toy config file --- .../configs/cityscapes/toy_model_eval.py | 166 ++++++++++++++++++ .../custom_segmentation_trainer.py | 8 +- experimental/robust_segvit/run_toy_eval.sh | 36 ++++ 3 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 experimental/robust_segvit/configs/cityscapes/toy_model_eval.py create mode 100755 experimental/robust_segvit/run_toy_eval.sh diff --git a/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py b/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py new file mode 100644 index 000000000..3dd70fcff --- /dev/null +++ b/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py @@ -0,0 +1,166 @@ +# coding=utf-8 +# Copyright 2022 The Uncertainty Baselines Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +r"""Eval toy segmenter model on cityscapes. + +""" +# pylint: enable=line-too-long + +import ml_collections +import os +import datetime + +batch_size = 8 +_CITYSCAPES_TRAIN_SIZE_SPLIT = 16 + +# Model spec. +STRIDE = 4 +mlp_dim = 2 +num_heads = 1 +num_layers = 1 +hidden_size = 1 +target_size = (128, 128) + +# Upstream +CHECKPOINT_ORIGIN = 'ub' +VIT_SIZE = 'debug' +RESNET_SIZE = None +CLASSIFIER = 'token' +EXPERIMENTID = 'city_toy' + +CHECKPOINT_PATHS = { + ('ub', 'debug', 4, None, 'token', 'city_toy'): + 'ub-ekb/segmenter/cityscapes/toy_model/toy_model', +} + + +CHECKPOINT_PATH = CHECKPOINT_PATHS[(CHECKPOINT_ORIGIN, VIT_SIZE, STRIDE, + RESNET_SIZE, CLASSIFIER, EXPERIMENTID)] + +def get_config(runlocal=''): + """Returns the configuration for Cityscapes segmentation.""" + + runlocal = bool(runlocal) + + config = ml_collections.ConfigDict() + config.experiment_name = 'cityscapes_segmenter_toy_model' + + # Dataset. + config.dataset_name = 'robust_segvit_segmentation' + config.dataset_configs = ml_collections.ConfigDict() + config.dataset_configs.target_size = target_size + config.dataset_configs.train_split = 'train[:16]' + config.dataset_configs.validation_split = 'validation[:16]' + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 + + # Model. + config.model_name = 'segvit' + config.model = ml_collections.ConfigDict() + + config.model.patches = ml_collections.ConfigDict() + config.model.patches.size = (STRIDE, STRIDE) + + config.model.backbone = ml_collections.ConfigDict() + config.model.backbone.type = 'vit' + config.model.backbone.mlp_dim = mlp_dim + config.model.backbone.num_heads = num_heads + config.model.backbone.num_layers = num_layers + config.model.backbone.hidden_size = hidden_size + config.model.backbone.dropout_rate = 0.1 + config.model.backbone.attention_dropout_rate = 0.0 + config.model.backbone.classifier = 'gap' + + # Decoder + config.model.decoder = ml_collections.ConfigDict() + config.model.decoder.type = 'linear' + + # Training. + config.trainer_name = 'segvit_trainer' + config.optimizer = 'adam' + config.optimizer_configs = ml_collections.ConfigDict() + config.l2_decay_factor = 0.0 + config.max_grad_norm = 1.0 + config.label_smoothing = None + config.num_training_epochs = ml_collections.FieldReference(2) + config.batch_size = batch_size + config.rng_seed = 0 + config.focal_loss_gamma = 0.0 + + # Learning rate. + config.steps_per_epoch = _CITYSCAPES_TRAIN_SIZE_SPLIT // config.get_ref( + 'batch_size') + # setting 'steps_per_cycle' to total_steps basically means non-cycling cosine. + config.lr_configs = ml_collections.ConfigDict() + config.lr_configs.learning_rate_schedule = 'compound' + config.lr_configs.factors = 'constant * cosine_decay * linear_warmup' + config.lr_configs.warmup_steps = 0 + config.lr_configs.steps_per_cycle = config.get_ref( + 'num_training_epochs') * config.get_ref('steps_per_epoch') + config.lr_configs.base_learning_rate = 1e-4 + + # model and data dtype + config.model_dtype_str = 'float32' + config.data_dtype_str = 'float32' + + # init not included + + # Logging. + config.write_summary = True + config.write_xm_measurements = True # write XM measurements + config.xprof = False # Profile using xprof. + config.checkpoint = True # Do checkpointing. + config.checkpoint_steps = 5 * config.get_ref('steps_per_epoch') + + config.debug_train = False # Debug mode during training. + config.debug_eval = False # Debug mode during eval. + config.log_eval_steps = 1 * config.get_ref('steps_per_epoch') + + # Evaluation. + config.eval_mode = True + config.eval_configs = ml_collections.ConfigDict() + config.eval_configs.mode = 'standard' + config.eval_covariate_shift = True + config.eval_label_shift = True + config.eval_configs.store_logits = False + + config.eval_robustness_configs = ml_collections.ConfigDict() + config.eval_robustness_configs.auc_online = True + config.eval_robustness_configs.method_name = 'mlogit' + + # wandb.ai configurations. + config.use_wandb = False + config.wandb_dir = 'wandb' + config.wandb_project = 'rdl-debug' + config.wandb_entity = 'ekellbuch' + config.wandb_exp_name = None # Give experiment a name. + config.wandb_exp_name = ( + os.path.splitext(os.path.basename(__file__))[0] + '_' + + datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) + config.wandb_exp_group = None # Give experiment a group name. + + if runlocal: + config.count_flops = False + + return config + + +def get_sweep(hyper): + return hyper.product([]) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index ab03fd44c..30946a9e6 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1284,7 +1284,7 @@ def evaluate_cityscapes_c( # update config: ood_config = ml_collections.ConfigDict() ood_config.update(**config) - ood_config.update({'dataset_name': 'cityscapes_variants'}) + ood_config.update({'dataset_name': 'robust_segvit_variants'}) accuracy_per_corruption = {} prefix = 'cityc' @@ -1293,7 +1293,7 @@ def evaluate_cityscapes_c( for severity in cityscapes_variants.CITYSCAPES_C_SEVERITIES: with ood_config.unlocked(): - ood_config.dataset_configs.dataset_name = f'cityscapes_corrupted/semantic_segmentation_{corruption}_{severity}' + ood_config.dataset_configs.name = f'cityscapes_c_{corruption}_{severity}' rng, data_rng = jax.random.split(rng) dataset = train_utils.get_dataset(ood_config, data_rng) @@ -1367,7 +1367,7 @@ def evaluate_fishyscapes( # update config: ood_config = ml_collections.ConfigDict() ood_config.update(**config) - ood_config.update({'dataset_name': 'cityscapes_variants'}) + ood_config.update({'dataset_name': 'robust_segvit_variants'}) device_count = jax.device_count() accuracy_per_corruption = {} @@ -1375,7 +1375,7 @@ def evaluate_fishyscapes( for corruption in cityscapes_variants.FISHYSCAPES_CORRUPTIONS: with ood_config.unlocked(): - ood_config.dataset_configs.dataset_name = f'fishyscapes/{corruption}' + ood_config.dataset_configs.name = f'fishyscapes/{corruption}' ood_config.batch_size = device_count data_rng, rng = jax.random.split(rng) diff --git a/experimental/robust_segvit/run_toy_eval.sh b/experimental/robust_segvit/run_toy_eval.sh new file mode 100755 index 000000000..67426937b --- /dev/null +++ b/experimental/robust_segvit/run_toy_eval.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# train toy model using wandb +#wandb sweep run_toy_mac.yaml +# before make sure we can run code vanilla version: + +DATASET='ade20k_ind' # or cityscapes +DATASET='street_hazards' +DATASET='cityscapes' + +base_output_dir="gs://ub-ekb/segmenter/${DATASET}/toy_model" + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +use_wandb=True +config_file="configs/${DATASET}/toy_model_eval.py:runlocal" +run_name="toy_model_eval" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--config.use_wandb=${use_wandb} \ +--tpu=${tpu} \ From 4be534778c28540b8bce476705484de830dfa104 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Tue, 8 Nov 2022 19:59:05 -0500 Subject: [PATCH 135/150] update cityscapes config to use the same data loader as ade20k and streethazards --- experimental/robust_segvit/configs/cityscapes/be_eval.py | 8 ++++++-- .../robust_segvit/configs/cityscapes/deterministic.py | 8 ++++++-- .../configs/cityscapes/deterministic_eval.py | 8 ++++++-- .../configs/cityscapes/deterministic_seeds.py | 8 ++++++-- experimental/robust_segvit/configs/cityscapes/gp.py | 8 ++++++-- experimental/robust_segvit/configs/cityscapes/gp_eval.py | 8 ++++++-- experimental/robust_segvit/configs/cityscapes/gp_seeds.py | 8 ++++++-- experimental/robust_segvit/configs/cityscapes/het.py | 8 ++++++-- experimental/robust_segvit/configs/cityscapes/het_eval.py | 8 ++++++-- .../robust_segvit/configs/cityscapes/torch_eval.py | 8 ++++++-- .../robust_segvit/configs/cityscapes/toy_model.py | 8 ++++++-- .../robust_segvit/configs/cityscapes/toy_model_eval.py | 1 - 12 files changed, 66 insertions(+), 23 deletions(-) diff --git a/experimental/robust_segvit/configs/cityscapes/be_eval.py b/experimental/robust_segvit/configs/cityscapes/be_eval.py index 912b4e47f..72b6b63e5 100644 --- a/experimental/robust_segvit/configs/cityscapes/be_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/be_eval.py @@ -66,11 +66,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_be_eval' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (1024, 2048) config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index 4e922550c..3437c5d15 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -74,11 +74,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_pretrained' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py b/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py index d028957a8..363761709 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic_eval.py @@ -68,11 +68,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_eval' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (1024, 2048) config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py b/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py index 10629b9ce..c3c0de27b 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic_seeds.py @@ -74,11 +74,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_seeds' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index 1de85fb22..5fd68e7ab 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -74,11 +74,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_gp_hyper' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/gp_eval.py b/experimental/robust_segvit/configs/cityscapes/gp_eval.py index db0438778..d5836360f 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/gp_eval.py @@ -68,11 +68,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_gp_eval' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (1024, 2048) config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/gp_seeds.py b/experimental/robust_segvit/configs/cityscapes/gp_seeds.py index 04d9f24d3..f43d397b0 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp_seeds.py +++ b/experimental/robust_segvit/configs/cityscapes/gp_seeds.py @@ -74,11 +74,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_gp_seeds' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index 064b8877b..c2ae8eeab 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -74,11 +74,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_het_base' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/het_eval.py b/experimental/robust_segvit/configs/cityscapes/het_eval.py index f9c2863fa..4038e5688 100644 --- a/experimental/robust_segvit/configs/cityscapes/het_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/het_eval.py @@ -70,11 +70,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_het_eval' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (1024, 2048) config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/torch_eval.py b/experimental/robust_segvit/configs/cityscapes/torch_eval.py index 62d2a7a85..532e9fe21 100644 --- a/experimental/robust_segvit/configs/cityscapes/torch_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/torch_eval.py @@ -45,11 +45,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_torch_eval' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = (1024, 2048) config.dataset_configs.train_split = 'train' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/toy_model.py b/experimental/robust_segvit/configs/cityscapes/toy_model.py index 5fc8107c1..ba2147c5d 100644 --- a/experimental/robust_segvit/configs/cityscapes/toy_model.py +++ b/experimental/robust_segvit/configs/cityscapes/toy_model.py @@ -44,11 +44,15 @@ def get_config(runlocal=''): config.experiment_name = 'cityscapes_segmenter_toy_model' # Dataset. - config.dataset_name = 'cityscapes' + config.dataset_name = 'robust_segvit_segmentation' config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.target_size = target_size config.dataset_configs.train_split = 'train[:5%]' - config.dataset_configs.dataset_name = '' # name of ood dataset to evaluate + config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate + config.dataset_configs.train_target_size = config.dataset_configs.get_ref( + 'target_size') + config.dataset_configs.denoise = None + config.dataset_configs.use_timestep = 0 # Model. config.model_name = 'segvit' diff --git a/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py b/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py index 3dd70fcff..83965106e 100644 --- a/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py +++ b/experimental/robust_segvit/configs/cityscapes/toy_model_eval.py @@ -65,7 +65,6 @@ def get_config(runlocal=''): config.dataset_configs.train_split = 'train[:16]' config.dataset_configs.validation_split = 'validation[:16]' config.dataset_configs.name = 'cityscapes' # name of dataset to evaluate - config.dataset_configs.train_target_size = config.dataset_configs.get_ref( 'target_size') config.dataset_configs.denoise = None From bed1293fd6f89c86bba78d05a83256b3209d7575 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Wed, 9 Nov 2022 20:33:18 -0500 Subject: [PATCH 136/150] use updated data laoder for all datasets --- experimental/robust_segvit/custom_segmentation_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 30946a9e6..0c11b53c7 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1288,9 +1288,9 @@ def evaluate_cityscapes_c( accuracy_per_corruption = {} prefix = 'cityc' - for corruption in cityscapes_variants.CITYSCAPES_C_CORRUPTIONS: + for corruption in datasets_info.CITYSCAPES_C_CORRUPTIONS: local_list = [] # list to compute macro average per corruption - for severity in cityscapes_variants.CITYSCAPES_C_SEVERITIES: + for severity in datasets_info.CITYSCAPES_C_SEVERITIES: with ood_config.unlocked(): ood_config.dataset_configs.name = f'cityscapes_c_{corruption}_{severity}' @@ -1372,7 +1372,7 @@ def evaluate_fishyscapes( device_count = jax.device_count() accuracy_per_corruption = {} prefix = 'fishyscapes' - for corruption in cityscapes_variants.FISHYSCAPES_CORRUPTIONS: + for corruption in datasets_info.FISHYSCAPES_CORRUPTIONS: with ood_config.unlocked(): ood_config.dataset_configs.name = f'fishyscapes/{corruption}' From 1be3cecb4f012297c8d923cefcd6cda863f6d756 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Thu, 10 Nov 2022 01:53:30 -0500 Subject: [PATCH 137/150] add street_hazards_corrupted --- .../custom_segmentation_trainer.py | 96 ++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 0c11b53c7..3aa328869 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -1157,7 +1157,9 @@ def evaluate_ood_step( global_unc_metrics_fn = model.get_global_unc_metrics_fn() # pytype: disable=attribute-error eval_ood_covariate = {'cityscapes_c': evaluate_cityscapes_c, - 'ade20k_ind_c': evaluate_ade20k_corrupted,} + 'ade20k_ind_c': evaluate_ade20k_corrupted, + 'street_hazards_c': evaluate_street_hazards_corrupted, + } # TODO(kellybuchanan): merge data sources. # The form of the ind dataset name depends on the source of the data. @@ -1173,6 +1175,9 @@ def evaluate_ood_step( elif any('ade20k' in ind_name for ind_name in ind_names): logging.info('Loading Ade20k_ind_c') ood_dataset = 'ade20k_ind_c' + elif any('street' in ind_name for ind_name in ind_names): + logging.info('Loading street_hazards_c') + ood_dataset = 'street_hazards_c' else: logging.info('OOD Covariate shift dataset is not implemented') ood_dataset = None @@ -1529,7 +1534,7 @@ def evaluate_ade20k_corrupted( prefix = 'ade20k_ind_c' for corruption in datasets_info.ADE20K_C_CORRUPTIONS: local_list = [] # list to compute macro average per corruption - for severity in range(1, 6): + for severity in datasets_info.ADE20K_C_SEVERITIES: with ood_config.unlocked(): ood_config.dataset_configs.name = f'ade20k_ind_c_{corruption}_{severity}' @@ -1573,6 +1578,93 @@ def evaluate_ade20k_corrupted( return eval_summary +def evaluate_street_hazards_corrupted( + train_state: train_utils.TrainState, + config: ml_collections.ConfigDict, + rng: Any, + eval_step_pmapped: Any, + writer: metric_writers.MetricWriter, + lead_host: Any, + global_metrics_fn: Any, + global_unc_metrics_fn: Any, + workdir : str, +) -> Dict[str, Any]: + """Evaluate StreetHazards-C dataset. + + Args: + train_state: train state. + config: experiment configuration. + rng: jax rng. + eval_step_pmapped: eval state + writer: CLU metrics writer instance. + lead_host: Evaluate global metrics on one of the hosts (lead_host) given + intermediate values collected from all hosts. + global_metrics_fn: global metrics to evaluate. + global_unc_metrics_fn: global uncertainty metrics to evaluate. + Returns: + eval_summary: summary evaluation + """ + # Load dataset + # set resource limit to debug in mac osx + # (see https://github.com/tensorflow/datasets/issues/1441) + if jax.process_index() == 0 and sys.platform == 'darwin': + low, high = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (low, high)) + + # update config: + ood_config = ml_collections.ConfigDict() + ood_config.update(**config) + ood_config.update({'dataset_name': 'robust_segvit_variants'}) + + # Calculate metrics per corruption. + accuracy_per_corruption = {} + prefix = 'street_hazards_c' + for corruption in datasets_info.STREETHAZARDS_C_CORRUPTIONS: + local_list = [] # list to compute macro average per corruption + for severity in datasets_info.STREETHAZARDS_C_SEVERITIES: + + with ood_config.unlocked(): + ood_config.dataset_configs.name = f'street_hazards_c_{corruption}_{severity}' + + data_rng, rng = jax.random.split(rng) + dataset = train_utils.get_dataset(ood_config, data_rng) + dataset.meta_data['prefix'] = prefix + f'/{corruption}/{severity}/valid' + + eval_summary = evaluate( + train_state=train_state, + dataset=dataset, + config=ood_config, + step=0, + eval_step_pmapped=eval_step_pmapped, + writer=writer, + lead_host=lead_host, + global_metrics_fn=global_metrics_fn, + global_unc_metrics_fn=global_unc_metrics_fn, + prefix=dataset.meta_data['prefix'], + workdir=workdir, + ) + + local_list.append(eval_summary) + + accuracy_per_corruption[corruption] = eval_utils.average_list_of_dicts( + local_list) + + ade20k_c_metrics = eval_utils.average_list_of_dicts( + accuracy_per_corruption.values()) + + # append name to metrics + key_separator = '_' + avg_corrupted_metrics = { + key_separator.join((prefix + '/valid', key)): val + for key, val in ade20k_c_metrics.items() + } + # update metrics + eval_summary.update(avg_corrupted_metrics) + writer.write_scalars(0, avg_corrupted_metrics) + writer.flush() + return eval_summary + + def evaluate_street_hazards_ood_open( train_state: train_utils.TrainState, config: ml_collections.ConfigDict, From 54c2d934f590bffcc6b574bdcad4f134b200dffd Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 16:24:10 -0500 Subject: [PATCH 138/150] update config to use msp for default ood metric --- experimental/robust_segvit/configs/ade20k_ind/be.py | 2 +- .../robust_segvit/configs/ade20k_ind/deterministic.py | 2 +- experimental/robust_segvit/configs/ade20k_ind/gp.py | 2 +- experimental/robust_segvit/configs/ade20k_ind/het.py | 2 +- experimental/robust_segvit/configs/cityscapes/be.py | 2 +- .../robust_segvit/configs/cityscapes/deterministic.py | 2 +- experimental/robust_segvit/configs/cityscapes/gp.py | 2 +- experimental/robust_segvit/configs/cityscapes/het.py | 2 +- experimental/robust_segvit/configs/street_hazards/be.py | 2 +- .../robust_segvit/configs/street_hazards/deterministic.py | 2 +- experimental/robust_segvit/configs/street_hazards/gp.py | 2 +- experimental/robust_segvit/configs/street_hazards/het.py | 6 +++--- 12 files changed, 14 insertions(+), 14 deletions(-) diff --git a/experimental/robust_segvit/configs/ade20k_ind/be.py b/experimental/robust_segvit/configs/ade20k_ind/be.py index 6edb44651..e0410ec30 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/be.py +++ b/experimental/robust_segvit/configs/ade20k_ind/be.py @@ -187,7 +187,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py index 2025e027c..33883665a 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/deterministic.py +++ b/experimental/robust_segvit/configs/ade20k_ind/deterministic.py @@ -180,7 +180,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/ade20k_ind/gp.py b/experimental/robust_segvit/configs/ade20k_ind/gp.py index 41eef64ab..bd022cbc6 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/gp.py +++ b/experimental/robust_segvit/configs/ade20k_ind/gp.py @@ -193,7 +193,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/ade20k_ind/het.py b/experimental/robust_segvit/configs/ade20k_ind/het.py index 3ec6ff989..b85e701ff 100644 --- a/experimental/robust_segvit/configs/ade20k_ind/het.py +++ b/experimental/robust_segvit/configs/ade20k_ind/het.py @@ -193,7 +193,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/cityscapes/be.py b/experimental/robust_segvit/configs/cityscapes/be.py index ed642b603..2bf6586f6 100644 --- a/experimental/robust_segvit/configs/cityscapes/be.py +++ b/experimental/robust_segvit/configs/cityscapes/be.py @@ -165,7 +165,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/cityscapes/deterministic.py b/experimental/robust_segvit/configs/cityscapes/deterministic.py index 3437c5d15..56b9c290c 100644 --- a/experimental/robust_segvit/configs/cityscapes/deterministic.py +++ b/experimental/robust_segvit/configs/cityscapes/deterministic.py @@ -163,7 +163,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/cityscapes/gp.py b/experimental/robust_segvit/configs/cityscapes/gp.py index 5fd68e7ab..e9ac3c654 100644 --- a/experimental/robust_segvit/configs/cityscapes/gp.py +++ b/experimental/robust_segvit/configs/cityscapes/gp.py @@ -176,7 +176,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/cityscapes/het.py b/experimental/robust_segvit/configs/cityscapes/het.py index c2ae8eeab..7d6b8f552 100644 --- a/experimental/robust_segvit/configs/cityscapes/het.py +++ b/experimental/robust_segvit/configs/cityscapes/het.py @@ -177,7 +177,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/street_hazards/be.py b/experimental/robust_segvit/configs/street_hazards/be.py index 7554ae85d..2f6b9f7a9 100644 --- a/experimental/robust_segvit/configs/street_hazards/be.py +++ b/experimental/robust_segvit/configs/street_hazards/be.py @@ -190,7 +190,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/street_hazards/deterministic.py b/experimental/robust_segvit/configs/street_hazards/deterministic.py index ce22f02c5..0c044eae9 100644 --- a/experimental/robust_segvit/configs/street_hazards/deterministic.py +++ b/experimental/robust_segvit/configs/street_hazards/deterministic.py @@ -182,7 +182,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/street_hazards/gp.py b/experimental/robust_segvit/configs/street_hazards/gp.py index 9daccba96..ba4c8bc4e 100644 --- a/experimental/robust_segvit/configs/street_hazards/gp.py +++ b/experimental/robust_segvit/configs/street_hazards/gp.py @@ -194,7 +194,7 @@ def get_config(runlocal=''): config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'nmlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False diff --git a/experimental/robust_segvit/configs/street_hazards/het.py b/experimental/robust_segvit/configs/street_hazards/het.py index 0a801eaf3..5493e2ad3 100644 --- a/experimental/robust_segvit/configs/street_hazards/het.py +++ b/experimental/robust_segvit/configs/street_hazards/het.py @@ -190,13 +190,13 @@ def get_config(runlocal=''): config.eval_configs = ml_collections.ConfigDict() config.eval_configs.mode = 'standard' config.eval_mode = False - config.eval_covariate_shift = False - config.eval_label_shift = False + config.eval_covariate_shift = True + config.eval_label_shift = True config.model.input_shape = target_size config.eval_robustness_configs = ml_collections.ConfigDict() config.eval_robustness_configs.auc_online = True - config.eval_robustness_configs.method_name = 'mlogit' + config.eval_robustness_configs.method_name = 'msp' # wandb.ai configurations. config.use_wandb = False From a1793ae5eded69a9f12fbb7aac623777ffe92c09 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 16:25:06 -0500 Subject: [PATCH 139/150] add script to train plex model with different seeds --- experimental/robust_segvit/run_train_seed.sh | 42 ++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 experimental/robust_segvit/run_train_seed.sh diff --git a/experimental/robust_segvit/run_train_seed.sh b/experimental/robust_segvit/run_train_seed.sh new file mode 100644 index 000000000..c9b645f2e --- /dev/null +++ b/experimental/robust_segvit/run_train_seed.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# call eval model using wandb + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=16 +fi + +# default config for eval +use_wandb=True + +for dataset in "cityscapes" #"ade20k_ind" "street_hazards" +do +for model in "deterministic" "gp" "be" "het" +do +for rng_seed in 1 +do +base_output_dir="gs://ub-ekb/segmenter/${dataset}" +config_file="configs/${dataset}/${model}.py" +run_name="${model}_eval" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--config.use_wandb=${use_wandb} \ +--config.rng_seed=${rng_seed} \ +--tpu=${tpu} \ + +done +done +done From aec65b5b9ad11979c949c85087f377f79b5eb789 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 16:29:17 -0500 Subject: [PATCH 140/150] update order for call to config --- experimental/robust_segvit/run_train_seed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_train_seed.sh b/experimental/robust_segvit/run_train_seed.sh index c9b645f2e..0625dcc43 100644 --- a/experimental/robust_segvit/run_train_seed.sh +++ b/experimental/robust_segvit/run_train_seed.sh @@ -19,7 +19,7 @@ use_wandb=True for dataset in "cityscapes" #"ade20k_ind" "street_hazards" do -for model in "deterministic" "gp" "be" "het" +for model in "deterministic" "gp" "het" "be" do for rng_seed in 1 do From ff87a4ea10afb6d5246e88e3dbbd739f8ea0bf99 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 18:01:49 -0500 Subject: [PATCH 141/150] add bash script to train model with runlocal --- .../robust_segvit/run_deterministic_local.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 experimental/robust_segvit/run_deterministic_local.sh diff --git a/experimental/robust_segvit/run_deterministic_local.sh b/experimental/robust_segvit/run_deterministic_local.sh new file mode 100755 index 000000000..427f4cce8 --- /dev/null +++ b/experimental/robust_segvit/run_deterministic_local.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# train toy model using wandb +#wandb sweep run_toy_mac.yaml +# before make sure we can run code vanilla version: + +DATASET='ade20k_ind' # or cityscapes +DATASET='street_hazards' +DATASET='cityscapes' + +base_output_dir="gs://ub-ekb/segmenter/${DATASET}/deterministic" + +# Debug on Mac OS X platform +use_gpu=False +if [ "$(uname)" = "Darwin" ] ; then +tpu=False +num_cores=1 +batch_size=5 +elif [ "$(uname)" = "Linux" ]; then +tpu='local' +num_cores=8 +batch_size=8 +fi + +config_file="configs/${DATASET}/deterministic.py:runlocal" +run_name="local" +output_dir="${base_output_dir}/${run_name}" +python deterministic.py \ +--output_dir=${output_dir} \ +--num_cores=$num_cores \ +--use_gpu=$use_gpu \ +--config=${config_file} \ +--config.batch_size=${batch_size} \ +--tpu=${tpu} \ From ce04a6e23ac36badb0ea2788ea7a5bff9778866e Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 18:11:30 -0500 Subject: [PATCH 142/150] include wandb in debug call --- experimental/robust_segvit/run_deterministic_local.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/experimental/robust_segvit/run_deterministic_local.sh b/experimental/robust_segvit/run_deterministic_local.sh index 427f4cce8..19c7a2d6a 100755 --- a/experimental/robust_segvit/run_deterministic_local.sh +++ b/experimental/robust_segvit/run_deterministic_local.sh @@ -12,6 +12,7 @@ base_output_dir="gs://ub-ekb/segmenter/${DATASET}/deterministic" # Debug on Mac OS X platform use_gpu=False + if [ "$(uname)" = "Darwin" ] ; then tpu=False num_cores=1 @@ -22,6 +23,8 @@ num_cores=8 batch_size=8 fi +use_wandb=True + config_file="configs/${DATASET}/deterministic.py:runlocal" run_name="local" output_dir="${base_output_dir}/${run_name}" @@ -31,4 +34,5 @@ python deterministic.py \ --use_gpu=$use_gpu \ --config=${config_file} \ --config.batch_size=${batch_size} \ +--config.use_wandb=${use_wandb} \ --tpu=${tpu} \ From 653a6b0bfff5069edde30b4a08f1a8cc9aedfbef Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 18:49:04 -0500 Subject: [PATCH 143/150] add wandb option to code call --- experimental/robust_segvit/run_toy_mac.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index c8ec065eb..c0a440095 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -41,6 +41,7 @@ fi # Set configuration file # ---------------------------------------------------- config_file="configs/${DATASET}/toy_model.py:runlocal" +use_wandb=True # ---------------------------------------------------- # Call model trainer. @@ -51,4 +52,5 @@ python deterministic.py \ --use_gpu=$use_gpu \ --config=${config_file} \ --config.batch_size=${batch_size} \ +--config.use_wandb=${use_wandb} \ --tpu=${tpu} \ From e0f92fa82a74669e0cb577ae9464044e1fd41dda Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:01:43 -0500 Subject: [PATCH 144/150] update multihost calculation for auc --- experimental/robust_segvit/metrics_multihost.py | 15 +++++---------- .../robust_segvit/metrics_multihost_test.py | 6 +++--- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py index 5a81b5716..548dcae23 100644 --- a/experimental/robust_segvit/metrics_multihost.py +++ b/experimental/robust_segvit/metrics_multihost.py @@ -93,7 +93,7 @@ def gather_metrics(self): num_thresholds=self.num_thresholds), self.auc) - return self.auc.result().numpy() + return self.auc.result() class ComputeOODAUCMetric: @@ -127,15 +127,10 @@ def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): def gather_metrics(self): auc_state = keras_auc_to_arrays(self.auc) - # Gather the data across all hosts. - all_auc_states = multihost_utils.process_allgather(auc_state) - - # Below we pick the first device. - self.auc = arrays_to_keras_auc(*combine_states(all_auc_states, - num_thresholds=self.num_thresholds), - self.auc) + # Gather the metrics: + self.auc = host_all_gather_metrics(self.auc) - return self.auc.result().numpy() + return self.auc.result() class ComputeScoreAUCMetric: @@ -174,4 +169,4 @@ def gather_metrics(self): num_thresholds=self.num_thresholds), self.auc) - return self.auc.result().numpy() + return self.auc.result() diff --git a/experimental/robust_segvit/metrics_multihost_test.py b/experimental/robust_segvit/metrics_multihost_test.py index 6efddf7af..207e91f94 100644 --- a/experimental/robust_segvit/metrics_multihost_test.py +++ b/experimental/robust_segvit/metrics_multihost_test.py @@ -63,7 +63,7 @@ def test_ComputeAUCMetric(self, seed, masked_fraction): sample_weight=fake_batch['batch_mask'], ) - auc_result = auc_roc.gather_metrics() + auc_result = auc_roc.gather_metrics().numpy() # Numpy result: if np.all(batch_np['batch_mask'] == 0): @@ -121,7 +121,7 @@ def test_ComputeOODAUCMetric(self, seed, masked_fraction): sample_weight=fake_batch['batch_mask'], *ood_kwargs, ) - auc_result = auc_roc.gather_metrics() + auc_result = auc_roc.gather_metrics().numpy() # Numpy result: if np.all(batch_np['batch_mask'] == 0): @@ -180,7 +180,7 @@ def test_ComputeScoreAUCMetric(self, seed, masked_fraction): sample_weight=fake_batch['batch_mask'], **ood_kwargs, ) - auc_result = auc_roc.gather_metrics() + auc_result = auc_roc.gather_metrics().numpy() # Numpy result: if np.all(batch_np['batch_mask'] == 0): From b33beb47180b7ef085e4ad96d34996f0504baed6 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:02:05 -0500 Subject: [PATCH 145/150] add del code to free memory --- experimental/robust_segvit/custom_segmentation_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 3aa328869..89961dc2e 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -257,6 +257,7 @@ def evaluate(train_state: train_utils.TrainState, writer.flush() # Free some memory + del example_viz, images, e_batch, e_predictions, e_logits, e_metrics, eval_batch del eval_metrics del eval_global_metrics_summary del eval_all_confusion_mats @@ -890,6 +891,7 @@ def train( extra_training_logs), writer=writer) + del example_viz, train_metrics, extra_training_logs # Reset metric accumulation for next evaluation cycle. train_metrics, extra_training_logs = [], [] From 9b085c5f0ad44bc337f9df1c25b034d58b5951cc Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:02:47 -0500 Subject: [PATCH 146/150] set toy default code to run cityscapes --- experimental/robust_segvit/run_toy_mac.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/experimental/robust_segvit/run_toy_mac.sh b/experimental/robust_segvit/run_toy_mac.sh index c0a440095..adc4c613c 100755 --- a/experimental/robust_segvit/run_toy_mac.sh +++ b/experimental/robust_segvit/run_toy_mac.sh @@ -8,8 +8,8 @@ # wandb sweep run_toy_mac.yaml DATASET='ade20k_ind' -DATASET='cityscapes' DATASET='street_hazards' +DATASET='cityscapes' # ---------------------------------------------------- # Set directory where outputs should be installed: @@ -42,6 +42,7 @@ fi # ---------------------------------------------------- config_file="configs/${DATASET}/toy_model.py:runlocal" use_wandb=True +eval_covariate_shift=False # ---------------------------------------------------- # Call model trainer. @@ -52,5 +53,6 @@ python deterministic.py \ --use_gpu=$use_gpu \ --config=${config_file} \ --config.batch_size=${batch_size} \ +--config.eval_covariate_shift=${eval_covariate_shift} \ --config.use_wandb=${use_wandb} \ --tpu=${tpu} \ From b3a722fdc8085092b67f5a8a0bca00bcc6a6f0c7 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:36:35 -0500 Subject: [PATCH 147/150] update default call for auc calculation --- experimental/robust_segvit/metrics_multihost.py | 1 - experimental/robust_segvit/metrics_multihost_test.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py index 548dcae23..3d2953c57 100644 --- a/experimental/robust_segvit/metrics_multihost.py +++ b/experimental/robust_segvit/metrics_multihost.py @@ -125,7 +125,6 @@ def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): self.auc.update_state(label, ood_score, sample_weight=sample_weight) def gather_metrics(self): - auc_state = keras_auc_to_arrays(self.auc) # Gather the metrics: self.auc = host_all_gather_metrics(self.auc) diff --git a/experimental/robust_segvit/metrics_multihost_test.py b/experimental/robust_segvit/metrics_multihost_test.py index 207e91f94..25ac79c5d 100644 --- a/experimental/robust_segvit/metrics_multihost_test.py +++ b/experimental/robust_segvit/metrics_multihost_test.py @@ -84,7 +84,9 @@ def test_ComputeOODAUCMetric(self, seed, masked_fraction): np.random.seed(seed) num_thresholds = 1000 - ood_kwargs = {} + ood_kwargs = { + 'method_name': 'mlogit', + } # Create test data: num_classes = 2 input_shape = [8, 1, 224, 224] @@ -119,7 +121,7 @@ def test_ComputeOODAUCMetric(self, seed, masked_fraction): auc_roc.calculate_and_update_scores(logits=pred, label=ood_label, sample_weight=fake_batch['batch_mask'], - *ood_kwargs, + **ood_kwargs, ) auc_result = auc_roc.gather_metrics().numpy() From d6c8cd6abb8d7a3f18ebb4548ce0a2bfc4b76694 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:39:16 -0500 Subject: [PATCH 148/150] remove call to computescoreaucmetric --- experimental/robust_segvit/custom_segmentation_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index 89961dc2e..ca314c2c6 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -54,7 +54,7 @@ import resource import sys import robustness_metrics as rm -from metrics_multihost import ComputeOODAUCMetric, ComputeScoreAUCMetric +from metrics_multihost import ComputeOODAUCMetric from metrics_multihost import host_all_gather_metrics Batch = Dict[str, jnp.ndarray] From 7e82e2f2eb032e9b01607bd315d6c20354a2e13a Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sat, 12 Nov 2022 22:44:28 -0500 Subject: [PATCH 149/150] remove ood calculation based on -1*score given that tf.keras.auc does not support negative values --- .../robust_segvit/metrics_multihost.py | 38 ------------ .../robust_segvit/metrics_multihost_test.py | 59 ------------------- 2 files changed, 97 deletions(-) diff --git a/experimental/robust_segvit/metrics_multihost.py b/experimental/robust_segvit/metrics_multihost.py index 3d2953c57..ae082fb60 100644 --- a/experimental/robust_segvit/metrics_multihost.py +++ b/experimental/robust_segvit/metrics_multihost.py @@ -131,41 +131,3 @@ def gather_metrics(self): return self.auc.result() - -class ComputeScoreAUCMetric: - """Calculate score based auc metrics across multiple hosts.""" - def __init__(self, curve, num_thresholds=200, summation_method='interpolation',thresholds=None): - self.curve = curve - self.num_thresholds = num_thresholds - self.from_logits = False - self.summation_method = summation_method - self.thresholds = thresholds - self.auc = tf.keras.metrics.AUC(curve=self.curve, - from_logits=self.from_logits, - num_thresholds=self.num_thresholds, - summation_method=self.summation_method, - thresholds=self.thresholds) - - def calculate_and_update_scores(self, logits, label, sample_weight, **kwargs): - " label 1 for ood pixel and 0 is otherwise." - conf = - 1 * get_score(logits, **kwargs) - - # skip images where all the pixels are ood or there are no ood pixels - all_pixel_ood = jnp.sum(label*sample_weight) == 1 - no_pixel_ood = jnp.sum(label*sample_weight) == 0 - - if not(all_pixel_ood) and not(no_pixel_ood): - self.auc.update_state(label, conf, sample_weight=sample_weight) - - def gather_metrics(self): - auc_state = keras_auc_to_arrays(self.auc) - - # Gather the data across all hosts. - all_auc_states = multihost_utils.process_allgather(auc_state) - - # Below we pick the first device. - self.auc = arrays_to_keras_auc(*combine_states(all_auc_states, - num_thresholds=self.num_thresholds), - self.auc) - - return self.auc.result() diff --git a/experimental/robust_segvit/metrics_multihost_test.py b/experimental/robust_segvit/metrics_multihost_test.py index 25ac79c5d..fb46d62fd 100644 --- a/experimental/robust_segvit/metrics_multihost_test.py +++ b/experimental/robust_segvit/metrics_multihost_test.py @@ -8,7 +8,6 @@ from metrics_multihost import ComputeAUCMetric from metrics_multihost import ComputeOODAUCMetric -from metrics_multihost import ComputeScoreAUCMetric import sklearn.metrics from ood_metrics import get_ood_score @@ -138,64 +137,6 @@ def test_ComputeOODAUCMetric(self, seed, masked_fraction): self.assertAlmostEqual(auc_result, auc_numpy, places=1) - @parameterized.parameters([(0, 0.0), (1, 0.01), (2, 0.5), (3, 0.99), (4, 1)]) - def test_ComputeScoreAUCMetric(self, seed, masked_fraction): - """Test computation of scored AUC metric.""" - np.random.seed(seed) - num_thresholds = 10000 - summation_method = 'interpolation' - ood_kwargs = {'method_name': 'msp'} - # Create test data: - num_classes = 2 - input_shape = [8, 1, 224, 224] - logits_shape = input_shape + [num_classes] - logits_np = np.random.rand(*logits_shape) - # Note: We include label -1, which indicates excluded pixels: - label = np.random.randint(0, num_classes, size=input_shape) - label[:4] = np.argmax(logits_np[:4], axis=-1) # Set half to correct. - - batch_np = { - 'logits': logits_np, - 'label': - label, - 'batch_mask': - (np.random.rand(*input_shape) > masked_fraction) & (label != -1), - } - batch = { - 'logits': jnp.array(logits_np), - 'label': jnp.array(batch_np['label']), - 'batch_mask': jnp.array(batch_np['batch_mask']), - } - - - fake_batches_replicated = jax_utils.replicate([batch]) - - auc_roc = ComputeScoreAUCMetric(curve='ROC', num_thresholds=num_thresholds, - summation_method=summation_method) - - for fake_batch in fake_batches_replicated: - pred = fake_batch['logits'] - ood_label = 1 - fake_batch['label'] - - auc_roc.calculate_and_update_scores(logits=pred, - label=ood_label, - sample_weight=fake_batch['batch_mask'], - **ood_kwargs, - ) - auc_result = auc_roc.gather_metrics().numpy() - - # Numpy result: - if np.all(batch_np['batch_mask'] == 0): - auc_numpy = 0 - else: - labels_negative_ignored = np.maximum(batch_np['label'], 0) - ood_label_np = 1 - labels_negative_ignored - ood_score = get_ood_score(logits_np, **ood_kwargs) - auc_numpy = sklearn.metrics.roc_auc_score(ood_label_np.ravel(), - ood_score.ravel(), - sample_weight=batch_np['batch_mask'].ravel()) - - self.assertAlmostEqual(auc_result, auc_numpy, places=1) if __name__ == '__main__': absltest.main() From 87fc8b0aea8e01e82537309454ad856f0f3134a4 Mon Sep 17 00:00:00 2001 From: Kelly Buchanan Date: Sun, 13 Nov 2022 00:23:43 -0500 Subject: [PATCH 150/150] slim trainer --- .../custom_segmentation_trainer.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/experimental/robust_segvit/custom_segmentation_trainer.py b/experimental/robust_segvit/custom_segmentation_trainer.py index ca314c2c6..a7d19690e 100644 --- a/experimental/robust_segvit/custom_segmentation_trainer.py +++ b/experimental/robust_segvit/custom_segmentation_trainer.py @@ -234,10 +234,14 @@ def evaluate(train_state: train_utils.TrainState, prefix=prefix, ) + del e_metrics, eval_batch, eval_metrics, eval_global_metrics_summary + del eval_all_confusion_mats + del eval_all_unc_confusion_mats + # Gather uncertainty metrics from all hosts and write value: ece_metric = host_all_gather_metrics(ece_metric) calib_auc = host_all_gather_metrics(calib_auc) - writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix) : ece_metric.result(), + writer.write_scalars(step=step, scalars={'{}_ece'.format(prefix): ece_metric.result(), '{}_calib_auc'.format(prefix): calib_auc.result(), } ) @@ -257,11 +261,7 @@ def evaluate(train_state: train_utils.TrainState, writer.flush() # Free some memory - del example_viz, images, e_batch, e_predictions, e_logits, e_metrics, eval_batch - del eval_metrics - del eval_global_metrics_summary - del eval_all_confusion_mats - del eval_all_unc_confusion_mats + del example_viz, images, e_batch, e_predictions, e_logits, logits return eval_summary @@ -348,8 +348,8 @@ def evaluate_ood( if store_logits: f.close() - eval_summary = {'auroc': float(auc_roc.gather_metrics()), - 'auprc': float(auc_pr.gather_metrics()), + eval_summary = {'auroc': auc_roc.gather_metrics(), + 'auprc': auc_pr.gather_metrics(), } else: @@ -518,12 +518,14 @@ def training_loss_fn(params): logits) # batch_size x h x w x num_classes metrics = metrics_fn(logits, batch) - new_train_state = train_state.replace( # pytype: disable=attribute-error + logits = jnp.argmax(logits, axis=-1) + + train_state = train_state.replace( # pytype: disable=attribute-error global_step=step + 1, optimizer=new_optimizer, model_state=new_model_state, rng=new_rng) - return new_train_state, metrics, lr, jnp.argmax(logits, axis=-1) + return train_state, metrics, lr, logits def eval_step( @@ -885,9 +887,9 @@ def train( train_summary = train_utils.log_train_summary( step=step, - train_metrics=jax.tree_map(train_utils.unreplicate_and_get, + train_metrics=jax.tree_util.tree_map(train_utils.unreplicate_and_get, train_metrics), - extra_training_logs=jax.tree_map(train_utils.unreplicate_and_get, + extra_training_logs=jax.tree_util.tree_map(train_utils.unreplicate_and_get, extra_training_logs), writer=writer)