llm-random · j321m · Jan 20, 2026 · Jan 20, 2026
diff --git a/configs/_cluster/helios.yaml b/configs/_cluster/helios.yaml
@@ -21,7 +21,7 @@ infrastructure:
     # export pixi variables
     - 'export PIXI_HOME=$PROJECT_HOME_PATH/pixi'
     - 'export PATH="$HOME/.pixi/bin:$PATH"'
-    - 'export XDG_DATA_HOME="PROJECT_HOME_PATH/data"'
+    - 'export XDG_DATA_HOME="$PROJECT_HOME_PATH/data"'
     - 'export XDG_CACHE_HOME="$PROJECT_HOME_PATH/cache"'
     - 'export XDG_STATE_HOME="$PROJECT_HOME_PATH/state"'
 

diff --git a/configs/pc_project/llama_1B_importances.yaml b/configs/pc_project/llama_1B_importances.yaml
@@ -56,7 +56,7 @@ apply_functions:
     dataloader: ${trainer.train_dataloader}
     dmodel: ${common.dmodel}
     dff: ${common.dff}
-    calibration_dataset_size: 8192 # nvidia used 2k steps of 4k sequence lenght - this is the saturaion poin - longer doesnt improve miningfully
+    calibration_dataset_size: 8192 # nvidia used 2k steps of 4k sequence length - this is the saturation point - longer doesn't improve meaningfully
     seq_len: ${common.sequence_length}
     total_batch_size: ${trainer.train_dataloader.total_batch_size}
     n_blocks: ${model.encoder.n_blocks}

diff --git a/configs/pc_project/llama_8B_importances.yaml b/configs/pc_project/llama_8B_importances.yaml
@@ -56,7 +56,7 @@ apply_functions:
     dataloader: ${trainer.train_dataloader}
     dmodel: ${common.dmodel}
     dff: ${common.dff}
-    calibration_dataset_size: 8192 # nvidia used 2k steps of 4k sequence lenght - this is the saturaion poin - longer doesnt improve miningfully
+    calibration_dataset_size: 8192 # nvidia used 2k steps of 4k sequence length - this is the saturation point - longer doesn't improve meaningfully
     seq_len: ${common.sequence_length}
     total_batch_size: ${trainer.train_dataloader.total_batch_size}
     n_blocks: ${model.encoder.n_blocks}

diff --git a/main.py b/main.py
@@ -70,7 +70,7 @@ def check_env_vars():
     assert int(os.environ["RANK"]) < int(os.environ["WORLD_SIZE"])
 
 
-def setup_enviroment():
+def setup_environment():
     if "WORLD_SIZE" not in os.environ:
         logger.warning("WORLD_SIZE is not set, setting it to 1")
         os.environ["WORLD_SIZE"] = "1"
@@ -267,7 +267,7 @@ def initialize_training_components(cfg: OmegaConf, metric_logger=None):
             cfg, model, learning_rate
         )
     elif cfg.trainer.checkpoint.load.type == "nano":
-        # TODO! if you want to apply function on loaded model it does NOT work now, it applies function on newly inintialized model than it loads model weights
+        # TODO! if you want to apply function on loaded model it does NOT work now, it applies function on newly initialized model than it loads model weights
         model, optimizer, scheduler = get_model_optimizer_scheduler(
             cfg, model, learning_rate
         )
@@ -292,7 +292,7 @@ def initialize_training_components(cfg: OmegaConf, metric_logger=None):
 
 
 def run(cfg: OmegaConf, metric_logger=None):
-    setup_enviroment()
+    setup_environment()
 
     if "distributed" in cfg.trainer and cfg.trainer.distributed is not None:
         distributed_setup()

diff --git a/src/core/utils.py b/src/core/utils.py
@@ -46,7 +46,7 @@ def solve_config_lr(
     config_lr: float,
 ) -> tuple[
     float, float
-]:  # TODO temporary place - move to devinitions eval+ when created
+]:  # TODO temporary place - move to definitions eval+ when created
     ret_lr, ret_exp_lr = None, None
     if config_lr < 1.0:
         ret_lr = config_lr