From c5d2ccfd744befd5279716310e3a7641f06c26ec Mon Sep 17 00:00:00 2001 From: schnamo Date: Wed, 18 Feb 2026 17:38:16 +0100 Subject: [PATCH 1/2] chaning learning rate back to default of 1e-3, however, we recommend 1e-4 for OPT fine-tuning experiments --- configs/model/electra-for-pretraining.yml | 2 +- configs/model/electra.yml | 2 +- configs/model/electra_pretraining.yml | 2 +- configs/model/electra_tox.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/model/electra-for-pretraining.yml b/configs/model/electra-for-pretraining.yml index 80acd9a1..d9b37515 100644 --- a/configs/model/electra-for-pretraining.yml +++ b/configs/model/electra-for-pretraining.yml @@ -4,7 +4,7 @@ init_args: class_path: chebai.loss.pretraining.ElectraPreLoss out_dim: null optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: generator: vocab_size: 1400 diff --git a/configs/model/electra.yml b/configs/model/electra.yml index 34fd4b45..4427715f 100644 --- a/configs/model/electra.yml +++ b/configs/model/electra.yml @@ -2,7 +2,7 @@ class_path: chebai.models.Electra init_args: model_type: classification optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: vocab_size: 4400 max_position_embeddings: 1800 diff --git a/configs/model/electra_pretraining.yml b/configs/model/electra_pretraining.yml index f480a792..c0a96443 100644 --- a/configs/model/electra_pretraining.yml +++ b/configs/model/electra_pretraining.yml @@ -2,7 +2,7 @@ class_path: chebai.models.ElectraPre init_args: out_dim: null optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 config: generator: vocab_size: 1400 diff --git a/configs/model/electra_tox.yml b/configs/model/electra_tox.yml index fbba5993..7322a8f3 100644 --- a/configs/model/electra_tox.yml +++ b/configs/model/electra_tox.yml @@ -2,7 +2,7 @@ class_path: chebai.models.Electra init_args: model_type: classification optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 # we recommend 1e-4 for OPT finetuning, however, 1e-3 is the default config: vocab_size: 1400 max_position_embeddings: 1800 From acc8a9a598c19096d9e64976eb68780a8b83e2a0 Mon Sep 17 00:00:00 2001 From: sfluegel Date: Thu, 19 Feb 2026 21:57:05 +0100 Subject: [PATCH 2/2] delete duplicate config, fix vocab size --- configs/model/electra-for-pretraining.yml | 4 ++-- configs/model/electra300.yml | 3 ++- configs/model/electra_pretraining.yml | 18 ------------------ 3 files changed, 4 insertions(+), 21 deletions(-) delete mode 100644 configs/model/electra_pretraining.yml diff --git a/configs/model/electra-for-pretraining.yml b/configs/model/electra-for-pretraining.yml index d9b37515..21059297 100644 --- a/configs/model/electra-for-pretraining.yml +++ b/configs/model/electra-for-pretraining.yml @@ -7,13 +7,13 @@ init_args: lr: 1e-3 config: generator: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 1800 num_attention_heads: 8 num_hidden_layers: 6 type_vocab_size: 1 discriminator: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 1800 num_attention_heads: 8 num_hidden_layers: 6 diff --git a/configs/model/electra300.yml b/configs/model/electra300.yml index 1eb96aa7..4002551e 100644 --- a/configs/model/electra300.yml +++ b/configs/model/electra300.yml @@ -1,9 +1,10 @@ class_path: chebai.models.Electra init_args: + model_type: classification optimizer_kwargs: lr: 1e-3 config: - vocab_size: 1400 + vocab_size: 4400 max_position_embeddings: 301 num_attention_heads: 8 num_hidden_layers: 6 diff --git a/configs/model/electra_pretraining.yml b/configs/model/electra_pretraining.yml deleted file mode 100644 index c0a96443..00000000 --- a/configs/model/electra_pretraining.yml +++ /dev/null @@ -1,18 +0,0 @@ -class_path: chebai.models.ElectraPre -init_args: - out_dim: null - optimizer_kwargs: - lr: 1e-3 - config: - generator: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1 - discriminator: - vocab_size: 1400 - max_position_embeddings: 1800 - num_attention_heads: 8 - num_hidden_layers: 6 - type_vocab_size: 1