diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index b353bb38bbd..262784eb7a1 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -123,7 +123,7 @@ "qwen2_5_1_5b": "Qwen/Qwen2.5-1.5B", "qwen2_5_coder_32b": "Qwen/Qwen2.5-Coder-32B-Instruct", "phi_4_mini": "microsoft/Phi-4-mini-instruct", - "smollm2": "HuggingFaceTB/SmolLM-135M", + "smollm2": "HuggingFaceTB/SmolLM2-135M", "qwen3_0_6b": "Qwen/Qwen3-0.6B", "qwen3_1_7b": "Qwen/Qwen3-1.7B", "qwen3_4b": "Qwen/Qwen3-4B", diff --git a/examples/models/smollm2/135M_config.json b/examples/models/smollm2/135M_config.json index 604c7e94ab5..1e3bc8ee0cb 100644 --- a/examples/models/smollm2/135M_config.json +++ b/examples/models/smollm2/135M_config.json @@ -6,7 +6,7 @@ "n_kv_heads": 3, "n_layers": 30, "norm_eps": 1e-05, - "rope_theta": 10000.0, + "rope_theta": 100000.0, "use_scaled_rope": false, "vocab_size": 49152, "use_hf_rope": false,