From 7a7ae5c4b7e611d3c82fe3e6930bebf2466d5180 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:48:53 -0800 Subject: [PATCH 01/15] Set up fixtures and data for tests Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/__init__.py | 0 tests/data/__init__.py | 8 ++++++++ tests/data/twitter_complaints_small.json | 10 ++++++++++ tests/fixtures/__init__.py | 9 +++++++++ 4 files changed, 27 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/data/__init__.py create mode 100644 tests/data/twitter_complaints_small.json create mode 100644 tests/fixtures/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 0000000000..d07e787c65 --- /dev/null +++ b/tests/data/__init__.py @@ -0,0 +1,8 @@ +"""Helpful datasets for configuring individual unit tests. +""" +# Standard +import os + +### Constants used for data +DATA_DIR = os.path.join(os.path.dirname(__file__)) +TWITTER_COMPLAINTS_DATA = os.path.join(DATA_DIR, "twitter_complaints_small.json") diff --git a/tests/data/twitter_complaints_small.json b/tests/data/twitter_complaints_small.json new file mode 100644 index 0000000000..eb203d10dc --- /dev/null +++ b/tests/data/twitter_complaints_small.json @@ -0,0 +1,10 @@ +{"Tweet text":"@HMRCcustomers No this is my first job","ID":0,"Label":2,"text_label":"no complaint","output":"### Text: @HMRCcustomers No this is my first job\n\n### Label: no complaint"} +{"Tweet text":"@KristaMariePark Thank you for your interest! If you decide to cancel, you can call Customer Care at 1-800-NYTIMES.","ID":1,"Label":2,"text_label":"no complaint","output":"### Text: @KristaMariePark Thank you for your interest! If you decide to cancel, you can call Customer Care at 1-800-NYTIMES.\n\n### Label: no complaint"} +{"Tweet text":"If I can't get my 3rd pair of @beatsbydre powerbeats to work today I'm doneski man. This is a slap in my balls. Your next @Bose @BoseService","ID":2,"Label":1,"text_label":"complaint","output":"### Text: If I can't get my 3rd pair of @beatsbydre powerbeats to work today I'm doneski man. This is a slap in my balls. Your next @Bose @BoseService\n\n### Label: complaint"} +{"Tweet text":"@EE On Rosneath Arial having good upload and download speeds but terrible latency 200ms. Why is this.","ID":3,"Label":1,"text_label":"complaint","output":"### Text: @EE On Rosneath Arial having good upload and download speeds but terrible latency 200ms. Why is this.\n\n### Label: complaint"} +{"Tweet text":"Couples wallpaper, so cute. :) #BrothersAtHome","ID":4,"Label":2,"text_label":"no complaint","output":"### Text: Couples wallpaper, so cute. :) #BrothersAtHome\n\n### Label: no complaint"} +{"Tweet text":"@mckelldogs This might just be me, but-- eyedrops? Artificial tears are so useful when you're sleep-deprived and sp\u2026 https:\/\/t.co\/WRtNsokblG","ID":5,"Label":2,"text_label":"no complaint","output":"### Text: @mckelldogs This might just be me, but-- eyedrops? Artificial tears are so useful when you're sleep-deprived and sp\u2026 https:\/\/t.co\/WRtNsokblG\n\n### Label: no complaint"} +{"Tweet text":"@Yelp can we get the exact calculations for a business rating (for example if its 4 stars but actually 4.2) or do we use a 3rd party site?","ID":6,"Label":2,"text_label":"no complaint","output":"### Text: @Yelp can we get the exact calculations for a business rating (for example if its 4 stars but actually 4.2) or do we use a 3rd party site?\n\n### Label: no complaint"} +{"Tweet text":"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?","ID":7,"Label":1,"text_label":"complaint","output":"### Text: @nationalgridus I have no water and the bill is current and paid. Can you do something about this?\n\n### Label: complaint"} +{"Tweet text":"Never shopping at @MACcosmetics again. Every time I go in there, their employees are super rude\/condescending. I'll take my $$ to @Sephora","ID":8,"Label":1,"text_label":"complaint","output":"### Text: Never shopping at @MACcosmetics again. Every time I go in there, their employees are super rude\/condescending. I'll take my $$ to @Sephora\n\n### Label: complaint"} +{"Tweet text":"@JenniferTilly Merry Christmas to as well. You get more stunning every year \ufffd\ufffd","ID":9,"Label":2,"text_label":"no complaint","output":"### Text: @JenniferTilly Merry Christmas to as well. You get more stunning every year \ufffd\ufffd\n\n### Label: no complaint"} diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000000..d10579b732 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,9 @@ +"""Helpful fixtures for configuring individual unit tests. +""" +# Standard +import os + +### Constants used in fixtures +FIXTURES_DIR = os.path.join(os.path.dirname(__file__)) +TINY_MODELS_DIR = os.path.join(FIXTURES_DIR, "tiny_models") +CAUSAL_LM_MODEL = os.path.join(TINY_MODELS_DIR, "LlamaForCausalLM") From 09fb92005a262929c01dd4a4eed4f66aebcb0609 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:49:23 -0800 Subject: [PATCH 02/15] Add basic unit tests Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/helpers.py | 19 ++++++++++ tests/test_sft_trainer.py | 76 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tests/helpers.py create mode 100644 tests/test_sft_trainer.py diff --git a/tests/helpers.py b/tests/helpers.py new file mode 100644 index 0000000000..14e6a6b44a --- /dev/null +++ b/tests/helpers.py @@ -0,0 +1,19 @@ +# Third Party +import transformers + +# Local +from tuning.config import configs, peft_config + +def causal_lm_train_kwargs(train_kwargs): + """Parse the kwargs for a valid train call to a Causal LM.""" + parser = transformers.HfArgumentParser( + dataclass_types=( + configs.ModelArguments, + configs.DataArguments, + configs.TrainingArguments, + peft_config.LoraConfig, + peft_config.PromptTuningConfig, + ) + ) + model_args, data_args, training_args, lora_config, prompt_tuning_config = parser.parse_dict(train_kwargs, allow_extra_keys=True) + return model_args, data_args, training_args, lora_config if train_kwargs.get("peft_method")=="lora" else prompt_tuning_config diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py new file mode 100644 index 0000000000..a8fb65d215 --- /dev/null +++ b/tests/test_sft_trainer.py @@ -0,0 +1,76 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit Tests for SFT Trainer. +""" + +# Standard +import os +import tempfile + +# Local +from tuning import sft_trainer +from tests.helpers import causal_lm_train_kwargs +from tests.fixtures import CAUSAL_LM_MODEL +from tests.data import TWITTER_COMPLAINTS_DATA + + +HAPPY_PATH_KWARGS = {"model_name_or_path": CAUSAL_LM_MODEL, + "data_path": TWITTER_COMPLAINTS_DATA, + "num_train_epochs": 5, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 4, + "gradient_accumulation_steps": 4, + "learning_rate": 0.00001, + "weight_decay": 0, + "warmup_ratio": 0.03, + "lr_scheduler_type": "cosine", + "logging_steps": 1, + "include_tokens_per_second": True, + "packing": False, + "response_template": "\\n###Response:", + "dataset_text_field": "output", + "use_flash_attn": False, + "torch_dtype": "float16", + "modelMaxLength": 4096, + "peft_method": "pt", + "prompt_tuning_init": "RANDOM", + "num_virtual_tokens": 8, + "prompt_tuning_init_text": "hello", + "tokenizer_name_or_path": CAUSAL_LM_MODEL, + "save_strategy":"epoch"} + +def test_run_causallm_pt(): + """Check if we can bootstrap and run causallm models""" + with tempfile.TemporaryDirectory() as tempdir: + HAPPY_PATH_KWARGS["output_dir"] = tempdir + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs(HAPPY_PATH_KWARGS) + sft_trainer.train(model_args, data_args, training_args, tune_config) + _validate_training(tempdir) + + +def test_run_causallm_lora(): + """Check if we can bootstrap and run causallm models""" + with tempfile.TemporaryDirectory() as tempdir: + HAPPY_PATH_KWARGS["output_dir"] = tempdir + HAPPY_PATH_KWARGS["peft_method"] = "lora" + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs(HAPPY_PATH_KWARGS) + sft_trainer.train(model_args, data_args, training_args, tune_config) + _validate_training(tempdir) + + +def _validate_training(tempdir): + assert any(x.startswith('checkpoint-') for x in os.listdir(tempdir)) + loss_file_path = "{}/train_loss.jsonl".format(tempdir) + assert os.path.exists(loss_file_path) == True + assert os.path.getsize(loss_file_path) > 0 \ No newline at end of file From c483ed2ca8057ab6078ac9159548dc8706604c80 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:51:26 -0800 Subject: [PATCH 03/15] Setting upper bound for transformers Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0c72043959..ba366414b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy accelerate>=0.20.3 packaging -transformers>=4.34.1 +transformers>=4.34.1,<4.38.0 torch aim==3.18.1 sentencepiece From 27f12dcd1f7ce7b69f17923178c8cbc17f556b7b Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:51:53 -0800 Subject: [PATCH 04/15] Ignore aim log files Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 178b160fc7..2bb9bd78d9 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,6 @@ venv/ # Tox envs .tox + +# Aim +.aim From da7f8cbc7e876f52421b3aa44802b3794baca983 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:52:17 -0800 Subject: [PATCH 05/15] Include int num_train_epochs Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tuning/sft_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 59943a7509..d9b7024283 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -123,7 +123,7 @@ def train( logger = logging.get_logger("sft_trainer") # Validate parameters - if (not isinstance(train_args.num_train_epochs, float)) or ( + if (not isinstance(train_args.num_train_epochs, (float, int))) or ( train_args.num_train_epochs <= 0 ): raise ValueError("num_train_epochs has to be an integer/float >= 1") From 45a6a053973218802b1f6ac4f734357ddb7207af Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:52:29 -0800 Subject: [PATCH 06/15] Fix formatting Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/helpers.py | 30 ++++++++++++++++++++++-------- tests/test_sft_trainer.py | 34 +++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 14e6a6b44a..b2835e0e0e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -4,16 +4,30 @@ # Local from tuning.config import configs, peft_config + def causal_lm_train_kwargs(train_kwargs): """Parse the kwargs for a valid train call to a Causal LM.""" parser = transformers.HfArgumentParser( - dataclass_types=( - configs.ModelArguments, - configs.DataArguments, - configs.TrainingArguments, - peft_config.LoraConfig, - peft_config.PromptTuningConfig, + dataclass_types=( + configs.ModelArguments, + configs.DataArguments, + configs.TrainingArguments, + peft_config.LoraConfig, + peft_config.PromptTuningConfig, ) ) - model_args, data_args, training_args, lora_config, prompt_tuning_config = parser.parse_dict(train_kwargs, allow_extra_keys=True) - return model_args, data_args, training_args, lora_config if train_kwargs.get("peft_method")=="lora" else prompt_tuning_config + ( + model_args, + data_args, + training_args, + lora_config, + prompt_tuning_config, + ) = parser.parse_dict(train_kwargs, allow_extra_keys=True) + return ( + model_args, + data_args, + training_args, + lora_config + if train_kwargs.get("peft_method") == "lora" + else prompt_tuning_config, + ) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index a8fb65d215..b30a6821a0 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -12,26 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. """Unit Tests for SFT Trainer. -""" +""" # Standard import os import tempfile -# Local -from tuning import sft_trainer -from tests.helpers import causal_lm_train_kwargs -from tests.fixtures import CAUSAL_LM_MODEL +# First Party from tests.data import TWITTER_COMPLAINTS_DATA +from tests.fixtures import CAUSAL_LM_MODEL +from tests.helpers import causal_lm_train_kwargs +# Local +from tuning import sft_trainer -HAPPY_PATH_KWARGS = {"model_name_or_path": CAUSAL_LM_MODEL, +HAPPY_PATH_KWARGS = { + "model_name_or_path": CAUSAL_LM_MODEL, "data_path": TWITTER_COMPLAINTS_DATA, "num_train_epochs": 5, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "gradient_accumulation_steps": 4, - "learning_rate": 0.00001, + "learning_rate": 0.00001, "weight_decay": 0, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", @@ -48,29 +50,35 @@ "num_virtual_tokens": 8, "prompt_tuning_init_text": "hello", "tokenizer_name_or_path": CAUSAL_LM_MODEL, - "save_strategy":"epoch"} + "save_strategy": "epoch", +} + def test_run_causallm_pt(): """Check if we can bootstrap and run causallm models""" with tempfile.TemporaryDirectory() as tempdir: HAPPY_PATH_KWARGS["output_dir"] = tempdir - model_args, data_args, training_args, tune_config = causal_lm_train_kwargs(HAPPY_PATH_KWARGS) + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + HAPPY_PATH_KWARGS + ) sft_trainer.train(model_args, data_args, training_args, tune_config) _validate_training(tempdir) - + def test_run_causallm_lora(): """Check if we can bootstrap and run causallm models""" with tempfile.TemporaryDirectory() as tempdir: HAPPY_PATH_KWARGS["output_dir"] = tempdir HAPPY_PATH_KWARGS["peft_method"] = "lora" - model_args, data_args, training_args, tune_config = causal_lm_train_kwargs(HAPPY_PATH_KWARGS) + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + HAPPY_PATH_KWARGS + ) sft_trainer.train(model_args, data_args, training_args, tune_config) _validate_training(tempdir) def _validate_training(tempdir): - assert any(x.startswith('checkpoint-') for x in os.listdir(tempdir)) + assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) loss_file_path = "{}/train_loss.jsonl".format(tempdir) assert os.path.exists(loss_file_path) == True - assert os.path.getsize(loss_file_path) > 0 \ No newline at end of file + assert os.path.getsize(loss_file_path) > 0 From 1158054f74f2bb2acacce9fbec3a28f64728b815 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:52:40 -0800 Subject: [PATCH 07/15] Add copyright notice Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/__init__.py | 13 +++++++++++++ tests/data/__init__.py | 14 ++++++++++++++ tests/fixtures/__init__.py | 14 ++++++++++++++ tests/helpers.py | 14 ++++++++++++++ tests/test_sft_trainer.py | 1 + 5 files changed, 56 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2..a211ad5c2f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/data/__init__.py b/tests/data/__init__.py index d07e787c65..18a35c3437 100644 --- a/tests/data/__init__.py +++ b/tests/data/__init__.py @@ -1,3 +1,17 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Helpful datasets for configuring individual unit tests. """ # Standard diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index d10579b732..9c59263df9 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,3 +1,17 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Helpful fixtures for configuring individual unit tests. """ # Standard diff --git a/tests/helpers.py b/tests/helpers.py index b2835e0e0e..24d7a2b8d0 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -1,3 +1,17 @@ +# Copyright The IBM Tuning Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Third Party import transformers diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index b30a6821a0..dc28de565f 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Unit Tests for SFT Trainer. """ From e7dfb0604fbd424ea0705d792628433584da27b1 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:52:55 -0800 Subject: [PATCH 08/15] Address review comments Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/test_sft_trainer.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index dc28de565f..41117f220e 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -18,6 +18,7 @@ # Standard import os import tempfile +import json # First Party from tests.data import TWITTER_COMPLAINTS_DATA @@ -41,11 +42,11 @@ "logging_steps": 1, "include_tokens_per_second": True, "packing": False, - "response_template": "\\n###Response:", + "response_template": "\n### Label:", "dataset_text_field": "output", "use_flash_attn": False, - "torch_dtype": "float16", - "modelMaxLength": 4096, + "torch_dtype": "float32", + "model_max_length": 4096, "peft_method": "pt", "prompt_tuning_init": "RANDOM", "num_virtual_tokens": 8, @@ -63,7 +64,7 @@ def test_run_causallm_pt(): HAPPY_PATH_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) + _validate_training(tempdir, "PROMPT_TUNING") def test_run_causallm_lora(): @@ -75,11 +76,16 @@ def test_run_causallm_lora(): HAPPY_PATH_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) + _validate_training(tempdir, "LORA") -def _validate_training(tempdir): +def _validate_training(tempdir, peft_type): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) - loss_file_path = "{}/train_loss.jsonl".format(tempdir) - assert os.path.exists(loss_file_path) == True - assert os.path.getsize(loss_file_path) > 0 + train_loss_file_path = "{}/train_loss.jsonl".format(tempdir) + assert os.path.exists(train_loss_file_path) == True + assert os.path.getsize(train_loss_file_path) > 0 + adapter_config_path = os.path.join(tempdir, "checkpoint-1", "adapter_config.json") + assert os.path.exists(adapter_config_path) + with open(adapter_config_path) as f: + data = json.load(f) + assert data.get("peft_type") == peft_type From 1d9b4ef940c9067ba31aefc56a93cdd74501855d Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:53:08 -0800 Subject: [PATCH 09/15] Run inference on tuned model Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/test_sft_trainer.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 41117f220e..1ff1836c7d 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -16,11 +16,12 @@ """ # Standard +import json import os import tempfile -import json # First Party +from scripts.run_inference import TunedCausalLM from tests.data import TWITTER_COMPLAINTS_DATA from tests.fixtures import CAUSAL_LM_MODEL from tests.helpers import causal_lm_train_kwargs @@ -66,6 +67,17 @@ def test_run_causallm_pt(): sft_trainer.train(model_args, data_args, training_args, tune_config) _validate_training(tempdir, "PROMPT_TUNING") + # Load the tuned model + loaded_model = TunedCausalLM.load( + checkpoint_path=os.path.join(tempdir, "checkpoint-5"), + base_model_name_or_path=CAUSAL_LM_MODEL, + ) + + # Run inference on the text using the tuned model + loaded_model.run( + "Simply put, the theory of relativity states that ", max_new_tokens=500 + ) + def test_run_causallm_lora(): """Check if we can bootstrap and run causallm models""" @@ -78,6 +90,17 @@ def test_run_causallm_lora(): sft_trainer.train(model_args, data_args, training_args, tune_config) _validate_training(tempdir, "LORA") + # Load the tuned model + loaded_model = TunedCausalLM.load( + checkpoint_path=os.path.join(tempdir, "checkpoint-5"), + base_model_name_or_path=CAUSAL_LM_MODEL, + ) + + # Run inference on the text using the tuned model + loaded_model.run( + "Simply put, the theory of relativity states that ", max_new_tokens=500 + ) + def _validate_training(tempdir, peft_type): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) From 6d912b60d04ccfac2d0609fcf5e75c677590a89f Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:36:08 -0800 Subject: [PATCH 10/15] Trainer downloads model Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/fixtures/__init__.py | 23 ----------------------- tests/test_sft_trainer.py | 7 ++----- 2 files changed, 2 insertions(+), 28 deletions(-) delete mode 100644 tests/fixtures/__init__.py diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py deleted file mode 100644 index 9c59263df9..0000000000 --- a/tests/fixtures/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright The IBM Tuning Team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Helpful fixtures for configuring individual unit tests. -""" -# Standard -import os - -### Constants used in fixtures -FIXTURES_DIR = os.path.join(os.path.dirname(__file__)) -TINY_MODELS_DIR = os.path.join(FIXTURES_DIR, "tiny_models") -CAUSAL_LM_MODEL = os.path.join(TINY_MODELS_DIR, "LlamaForCausalLM") diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 1ff1836c7d..1e98533b3c 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -23,14 +23,13 @@ # First Party from scripts.run_inference import TunedCausalLM from tests.data import TWITTER_COMPLAINTS_DATA -from tests.fixtures import CAUSAL_LM_MODEL from tests.helpers import causal_lm_train_kwargs # Local from tuning import sft_trainer HAPPY_PATH_KWARGS = { - "model_name_or_path": CAUSAL_LM_MODEL, + "model_name_or_path": "Maykeye/TinyLLama-v0", "data_path": TWITTER_COMPLAINTS_DATA, "num_train_epochs": 5, "per_device_train_batch_size": 4, @@ -52,7 +51,7 @@ "prompt_tuning_init": "RANDOM", "num_virtual_tokens": 8, "prompt_tuning_init_text": "hello", - "tokenizer_name_or_path": CAUSAL_LM_MODEL, + "tokenizer_name_or_path": "Maykeye/TinyLLama-v0", "save_strategy": "epoch", } @@ -70,7 +69,6 @@ def test_run_causallm_pt(): # Load the tuned model loaded_model = TunedCausalLM.load( checkpoint_path=os.path.join(tempdir, "checkpoint-5"), - base_model_name_or_path=CAUSAL_LM_MODEL, ) # Run inference on the text using the tuned model @@ -93,7 +91,6 @@ def test_run_causallm_lora(): # Load the tuned model loaded_model = TunedCausalLM.load( checkpoint_path=os.path.join(tempdir, "checkpoint-5"), - base_model_name_or_path=CAUSAL_LM_MODEL, ) # Run inference on the text using the tuned model From 6c8740467bbb4ce88ebfc56b2dffb82473006c83 Mon Sep 17 00:00:00 2001 From: Anh-Uong Date: Thu, 7 Mar 2024 12:11:42 -0700 Subject: [PATCH 11/15] add more unit tests and refactor Signed-off-by: Anh-Uong --- tests/test_sft_trainer.py | 187 ++++++++++++++++++++++++++++++-------- 1 file changed, 150 insertions(+), 37 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 1e98533b3c..b1d78d8451 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -16,9 +16,11 @@ """ # Standard -import json import os import tempfile +import pytest +import copy +import json # First Party from scripts.run_inference import TunedCausalLM @@ -28,8 +30,9 @@ # Local from tuning import sft_trainer -HAPPY_PATH_KWARGS = { - "model_name_or_path": "Maykeye/TinyLLama-v0", +MODEL_NAME = "Maykeye/TinyLLama-v0" +BASE_PEFT_KWARGS = { + "model_name_or_path": MODEL_NAME, "data_path": TWITTER_COMPLAINTS_DATA, "num_train_epochs": 5, "per_device_train_batch_size": 4, @@ -51,61 +54,171 @@ "prompt_tuning_init": "RANDOM", "num_virtual_tokens": 8, "prompt_tuning_init_text": "hello", - "tokenizer_name_or_path": "Maykeye/TinyLLama-v0", + "tokenizer_name_or_path": MODEL_NAME, "save_strategy": "epoch", + "output_dir": "tmp", } +BASE_LORA_KWARGS = copy.deepcopy(BASE_PEFT_KWARGS) +BASE_LORA_KWARGS["peft_method"] = "lora" + +def test_helper_causal_lm_train_kwargs(): + """Check happy path kwargs passed and parsed properly.""" + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + BASE_PEFT_KWARGS + ) + + assert model_args.model_name_or_path == MODEL_NAME + assert model_args.use_flash_attn == False + assert model_args.torch_dtype == "float32" + + assert data_args.data_path == TWITTER_COMPLAINTS_DATA + assert data_args.response_template == "\n### Label:" + assert data_args.dataset_text_field == "output" + + assert training_args.num_train_epochs == 5 + assert training_args.model_max_length == 4096 + assert training_args.save_strategy == "epoch" + + assert tune_config.prompt_tuning_init == "RANDOM" + assert tune_config.prompt_tuning_init_text == "hello" + assert tune_config.tokenizer_name_or_path == MODEL_NAME + assert tune_config.num_virtual_tokens == 8 + +def test_run_train_requires_output_dir(): + """Check fails when output dir not provided.""" + updated_output_dir = copy.deepcopy(BASE_PEFT_KWARGS) + updated_output_dir["output_dir"] = None + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + updated_output_dir + ) + with pytest.raises(TypeError): + sft_trainer.train(model_args, data_args, training_args, tune_config) + +def test_run_train_fails_data_path_not_exist(): + """Check fails when data path not found.""" + updated_output_path = copy.deepcopy(BASE_PEFT_KWARGS) + updated_output_path["data_path"] = "fake/path" + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + updated_output_path + ) + with pytest.raises(FileNotFoundError): + sft_trainer.train(model_args, data_args, training_args, tune_config) -def test_run_causallm_pt(): - """Check if we can bootstrap and run causallm models""" +def test_run_causallm_pt_and_inference(): + """Check if we can bootstrap and peft tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: - HAPPY_PATH_KWARGS["output_dir"] = tempdir + BASE_PEFT_KWARGS["output_dir"] = tempdir model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( - HAPPY_PATH_KWARGS + BASE_PEFT_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir, "PROMPT_TUNING") + _validate_training(tempdir) - # Load the tuned model - loaded_model = TunedCausalLM.load( - checkpoint_path=os.path.join(tempdir, "checkpoint-5"), - ) + # validate peft tuning configs + checkpoint_path = os.path.join(tempdir, "checkpoint-5") + adapter_config = _get_adapter_config(checkpoint_path) + assert adapter_config.get("task_type") == "CAUSAL_LM" + assert adapter_config.get("peft_type") == "PROMPT_TUNING" + assert adapter_config.get("tokenizer_name_or_path") == BASE_PEFT_KWARGS["tokenizer_name_or_path"] - # Run inference on the text using the tuned model - loaded_model.run( - "Simply put, the theory of relativity states that ", max_new_tokens=500 + # Load the model + loaded_model = TunedCausalLM.load(checkpoint_path) + + # Run inference on the text + output_inference = loaded_model.run("### Text: @NortonSupport Thanks much.\n\n### Label:", max_new_tokens=50) + assert len(output_inference) > 0 + assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference + +def test_run_causallm_pt_with_validation(): + """Check if we can bootstrap and peft tune causallm models with validation dataset""" + with tempfile.TemporaryDirectory() as tempdir: + validation_peft = copy.deepcopy(BASE_PEFT_KWARGS) + validation_peft["output_dir"] = tempdir + validation_peft["validation_data_path"] = TWITTER_COMPLAINTS_DATA + validation_peft["evaluation_strategy"] = "epoch" + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + validation_peft ) + assert data_args.validation_data_path == TWITTER_COMPLAINTS_DATA -def test_run_causallm_lora(): - """Check if we can bootstrap and run causallm models""" + sft_trainer.train(model_args, data_args, training_args, tune_config) + _validate_training(tempdir) + + eval_loss_file_path = os.path.join(tempdir, "eval_loss.jsonl") + assert os.path.exists(eval_loss_file_path) + assert os.path.getsize(eval_loss_file_path) > 0 + +def test_run_causallm_lora_and_inference(): + """Check if we can bootstrap and lora tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: - HAPPY_PATH_KWARGS["output_dir"] = tempdir - HAPPY_PATH_KWARGS["peft_method"] = "lora" + BASE_LORA_KWARGS["output_dir"] = tempdir model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( - HAPPY_PATH_KWARGS + BASE_LORA_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir, "LORA") + _validate_training(tempdir) + + # validate peft tuning configs + checkpoint_path = os.path.join(tempdir, "checkpoint-5") + adapter_config = _get_adapter_config(checkpoint_path) + assert adapter_config.get("task_type") == "CAUSAL_LM" + assert adapter_config.get("peft_type") == "LORA" + for module in ["q_proj", "v_proj"]: # default target_modules used + assert module in adapter_config.get("target_modules") + + # Load the model + loaded_model = TunedCausalLM.load(checkpoint_path) + + # Run inference on the text + output_inference = loaded_model.run("Simply put, the theory of relativity states that ", max_new_tokens=50) + assert len(output_inference) > 0 + assert "Simply put, the theory of relativity states that" in output_inference + +def test_run_train_lora_target_modules(): + """Check runs lora tuning with given list of target modules.""" + with tempfile.TemporaryDirectory() as tempdir: + lora_target_modules = copy.deepcopy(BASE_LORA_KWARGS) + lora_target_modules["output_dir"] = tempdir + lora_target_modules["target_modules"] = ["q_proj","k_proj","v_proj","o_proj"] - # Load the tuned model - loaded_model = TunedCausalLM.load( - checkpoint_path=os.path.join(tempdir, "checkpoint-5"), + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + lora_target_modules ) + sft_trainer.train(model_args, data_args, training_args, tune_config) + _validate_training(tempdir) + + checkpoint_path = os.path.join(tempdir, "checkpoint-5") + adapter_config = _get_adapter_config(checkpoint_path) + for module in lora_target_modules["target_modules"]: + assert module in adapter_config.get("target_modules") + +def test_run_train_lora_target_modules_all_linear(): + """Check runs lora tuning with all linear target modules.""" + with tempfile.TemporaryDirectory() as tempdir: + lora_target_modules = copy.deepcopy(BASE_LORA_KWARGS) + lora_target_modules["output_dir"] = tempdir + lora_target_modules["target_modules"] = ["all-linear"] - # Run inference on the text using the tuned model - loaded_model.run( - "Simply put, the theory of relativity states that ", max_new_tokens=500 + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + lora_target_modules ) + sft_trainer.train(model_args, data_args, training_args, tune_config) + _validate_training(tempdir) + checkpoint_path = os.path.join(tempdir, "checkpoint-5") + adapter_config = _get_adapter_config(checkpoint_path) + llama_expected_modules = ["o_proj", "q_proj", "gate_proj", "down_proj", "k_proj", "up_proj", "v_proj"] + for module in llama_expected_modules: + assert module in adapter_config.get("target_modules") -def _validate_training(tempdir, peft_type): +def _validate_training(tempdir): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) - train_loss_file_path = "{}/train_loss.jsonl".format(tempdir) - assert os.path.exists(train_loss_file_path) == True - assert os.path.getsize(train_loss_file_path) > 0 - adapter_config_path = os.path.join(tempdir, "checkpoint-1", "adapter_config.json") - assert os.path.exists(adapter_config_path) - with open(adapter_config_path) as f: - data = json.load(f) - assert data.get("peft_type") == peft_type + loss_file_path = "{}/train_loss.jsonl".format(tempdir) + assert os.path.exists(loss_file_path) + assert os.path.getsize(loss_file_path) > 0 + +def _get_adapter_config(dir_path): + with open(os.path.join(dir_path, "adapter_config.json")) as f: + return json.load(f) \ No newline at end of file From f288cd43f3c137dc89277aa985c756a0a5aeb0aa Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:51:13 -0700 Subject: [PATCH 12/15] Fix formatting Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/test_sft_trainer.py | 45 +++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index b1d78d8451..d04a465e17 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -16,11 +16,13 @@ """ # Standard +import copy +import json import os import tempfile + +# Third Party import pytest -import copy -import json # First Party from scripts.run_inference import TunedCausalLM @@ -62,6 +64,7 @@ BASE_LORA_KWARGS = copy.deepcopy(BASE_PEFT_KWARGS) BASE_LORA_KWARGS["peft_method"] = "lora" + def test_helper_causal_lm_train_kwargs(): """Check happy path kwargs passed and parsed properly.""" model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( @@ -85,6 +88,7 @@ def test_helper_causal_lm_train_kwargs(): assert tune_config.tokenizer_name_or_path == MODEL_NAME assert tune_config.num_virtual_tokens == 8 + def test_run_train_requires_output_dir(): """Check fails when output dir not provided.""" updated_output_dir = copy.deepcopy(BASE_PEFT_KWARGS) @@ -95,6 +99,7 @@ def test_run_train_requires_output_dir(): with pytest.raises(TypeError): sft_trainer.train(model_args, data_args, training_args, tune_config) + def test_run_train_fails_data_path_not_exist(): """Check fails when data path not found.""" updated_output_path = copy.deepcopy(BASE_PEFT_KWARGS) @@ -105,6 +110,7 @@ def test_run_train_fails_data_path_not_exist(): with pytest.raises(FileNotFoundError): sft_trainer.train(model_args, data_args, training_args, tune_config) + def test_run_causallm_pt_and_inference(): """Check if we can bootstrap and peft tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: @@ -120,16 +126,22 @@ def test_run_causallm_pt_and_inference(): adapter_config = _get_adapter_config(checkpoint_path) assert adapter_config.get("task_type") == "CAUSAL_LM" assert adapter_config.get("peft_type") == "PROMPT_TUNING" - assert adapter_config.get("tokenizer_name_or_path") == BASE_PEFT_KWARGS["tokenizer_name_or_path"] + assert ( + adapter_config.get("tokenizer_name_or_path") + == BASE_PEFT_KWARGS["tokenizer_name_or_path"] + ) # Load the model loaded_model = TunedCausalLM.load(checkpoint_path) # Run inference on the text - output_inference = loaded_model.run("### Text: @NortonSupport Thanks much.\n\n### Label:", max_new_tokens=50) + output_inference = loaded_model.run( + "### Text: @NortonSupport Thanks much.\n\n### Label:", max_new_tokens=50 + ) assert len(output_inference) > 0 assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference + def test_run_causallm_pt_with_validation(): """Check if we can bootstrap and peft tune causallm models with validation dataset""" with tempfile.TemporaryDirectory() as tempdir: @@ -150,6 +162,7 @@ def test_run_causallm_pt_with_validation(): assert os.path.exists(eval_loss_file_path) assert os.path.getsize(eval_loss_file_path) > 0 + def test_run_causallm_lora_and_inference(): """Check if we can bootstrap and lora tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: @@ -165,23 +178,26 @@ def test_run_causallm_lora_and_inference(): adapter_config = _get_adapter_config(checkpoint_path) assert adapter_config.get("task_type") == "CAUSAL_LM" assert adapter_config.get("peft_type") == "LORA" - for module in ["q_proj", "v_proj"]: # default target_modules used + for module in ["q_proj", "v_proj"]: # default target_modules used assert module in adapter_config.get("target_modules") # Load the model loaded_model = TunedCausalLM.load(checkpoint_path) # Run inference on the text - output_inference = loaded_model.run("Simply put, the theory of relativity states that ", max_new_tokens=50) + output_inference = loaded_model.run( + "Simply put, the theory of relativity states that ", max_new_tokens=50 + ) assert len(output_inference) > 0 assert "Simply put, the theory of relativity states that" in output_inference + def test_run_train_lora_target_modules(): """Check runs lora tuning with given list of target modules.""" with tempfile.TemporaryDirectory() as tempdir: lora_target_modules = copy.deepcopy(BASE_LORA_KWARGS) lora_target_modules["output_dir"] = tempdir - lora_target_modules["target_modules"] = ["q_proj","k_proj","v_proj","o_proj"] + lora_target_modules["target_modules"] = ["q_proj", "k_proj", "v_proj", "o_proj"] model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( lora_target_modules @@ -194,6 +210,7 @@ def test_run_train_lora_target_modules(): for module in lora_target_modules["target_modules"]: assert module in adapter_config.get("target_modules") + def test_run_train_lora_target_modules_all_linear(): """Check runs lora tuning with all linear target modules.""" with tempfile.TemporaryDirectory() as tempdir: @@ -209,16 +226,26 @@ def test_run_train_lora_target_modules_all_linear(): checkpoint_path = os.path.join(tempdir, "checkpoint-5") adapter_config = _get_adapter_config(checkpoint_path) - llama_expected_modules = ["o_proj", "q_proj", "gate_proj", "down_proj", "k_proj", "up_proj", "v_proj"] + llama_expected_modules = [ + "o_proj", + "q_proj", + "gate_proj", + "down_proj", + "k_proj", + "up_proj", + "v_proj", + ] for module in llama_expected_modules: assert module in adapter_config.get("target_modules") + def _validate_training(tempdir): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) loss_file_path = "{}/train_loss.jsonl".format(tempdir) assert os.path.exists(loss_file_path) assert os.path.getsize(loss_file_path) > 0 + def _get_adapter_config(dir_path): with open(os.path.join(dir_path, "adapter_config.json")) as f: - return json.load(f) \ No newline at end of file + return json.load(f) From f958f159c075a3656fe59268a1aba0414b3e9fce Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:15:03 -0700 Subject: [PATCH 13/15] Add FT unit test and refactor Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/test_sft_trainer.py | 72 ++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index d04a465e17..0156e1e067 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -64,6 +64,9 @@ BASE_LORA_KWARGS = copy.deepcopy(BASE_PEFT_KWARGS) BASE_LORA_KWARGS["peft_method"] = "lora" +BASE_FT_KWARGS = copy.deepcopy(BASE_PEFT_KWARGS) +BASE_FT_KWARGS["peft_method"] = "" + def test_helper_causal_lm_train_kwargs(): """Check happy path kwargs passed and parsed properly.""" @@ -119,17 +122,12 @@ def test_run_causallm_pt_and_inference(): BASE_PEFT_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) # validate peft tuning configs + _validate_training(tempdir) checkpoint_path = os.path.join(tempdir, "checkpoint-5") adapter_config = _get_adapter_config(checkpoint_path) - assert adapter_config.get("task_type") == "CAUSAL_LM" - assert adapter_config.get("peft_type") == "PROMPT_TUNING" - assert ( - adapter_config.get("tokenizer_name_or_path") - == BASE_PEFT_KWARGS["tokenizer_name_or_path"] - ) + _validate_adapter_config(adapter_config, "PROMPT_TUNING", BASE_PEFT_KWARGS) # Load the model loaded_model = TunedCausalLM.load(checkpoint_path) @@ -171,13 +169,12 @@ def test_run_causallm_lora_and_inference(): BASE_LORA_KWARGS ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) - # validate peft tuning configs + # validate lora tuning configs + _validate_training(tempdir) checkpoint_path = os.path.join(tempdir, "checkpoint-5") adapter_config = _get_adapter_config(checkpoint_path) - assert adapter_config.get("task_type") == "CAUSAL_LM" - assert adapter_config.get("peft_type") == "LORA" + _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) for module in ["q_proj", "v_proj"]: # default target_modules used assert module in adapter_config.get("target_modules") @@ -203,10 +200,12 @@ def test_run_train_lora_target_modules(): lora_target_modules ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) + # validate lora tuning configs + _validate_training(tempdir) checkpoint_path = os.path.join(tempdir, "checkpoint-5") adapter_config = _get_adapter_config(checkpoint_path) + _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) for module in lora_target_modules["target_modules"]: assert module in adapter_config.get("target_modules") @@ -222,10 +221,12 @@ def test_run_train_lora_target_modules_all_linear(): lora_target_modules ) sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) + # validate lora tuning configs + _validate_training(tempdir) checkpoint_path = os.path.join(tempdir, "checkpoint-5") adapter_config = _get_adapter_config(checkpoint_path) + _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) llama_expected_modules = [ "o_proj", "q_proj", @@ -239,13 +240,52 @@ def test_run_train_lora_target_modules_all_linear(): assert module in adapter_config.get("target_modules") +def test_run_causallm_ft_and_inference(): + """Check if we can bootstrap and finetune tune causallm models""" + with tempfile.TemporaryDirectory() as tempdir: + BASE_FT_KWARGS["output_dir"] = tempdir + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + BASE_FT_KWARGS + ) + sft_trainer.train(model_args, data_args, training_args, tune_config) + + # validate ft tuning configs + _validate_training(tempdir) + checkpoint_path = os.path.join(tempdir, "checkpoint-5") + adapter_config = _get_adapter_config(checkpoint_path) + _validate_adapter_config(adapter_config, "PROMPT_TUNING", BASE_FT_KWARGS) + + # Load the model + loaded_model = TunedCausalLM.load(checkpoint_path) + + # Run inference on the text + output_inference = loaded_model.run( + "### Text: @NortonSupport Thanks much.\n\n### Label:", max_new_tokens=50 + ) + assert len(output_inference) > 0 + assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference + + def _validate_training(tempdir): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) - loss_file_path = "{}/train_loss.jsonl".format(tempdir) - assert os.path.exists(loss_file_path) - assert os.path.getsize(loss_file_path) > 0 + train_loss_file_path = "{}/train_loss.jsonl".format(tempdir) + assert os.path.exists(train_loss_file_path) == True + assert os.path.getsize(train_loss_file_path) > 0 def _get_adapter_config(dir_path): with open(os.path.join(dir_path, "adapter_config.json")) as f: return json.load(f) + + +def _validate_adapter_config(adapter_config, peft_type, base_kwargs): + assert adapter_config.get("task_type") == "CAUSAL_LM" + assert adapter_config.get("peft_type") == peft_type + assert ( + ( + adapter_config.get("tokenizer_name_or_path") + == base_kwargs["tokenizer_name_or_path"] + ) + if peft_type == "PROMPT_TUNING" + else True + ) From 3d7b4df523bd9dff32721207e021db9a57a899e5 Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:28:00 -0700 Subject: [PATCH 14/15] Removing transformers upper bound cap Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ba366414b8..0c72043959 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy accelerate>=0.20.3 packaging -transformers>=4.34.1,<4.38.0 +transformers>=4.34.1 torch aim==3.18.1 sentencepiece From dc55c7282467599721e6abce939f0ea99547073c Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Mon, 11 Mar 2024 22:08:51 -0700 Subject: [PATCH 15/15] Address review comments Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- tests/test_sft_trainer.py | 164 +++++++++++++++++++++++--------------- 1 file changed, 99 insertions(+), 65 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 0156e1e067..f70841a9f8 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -66,6 +66,8 @@ BASE_FT_KWARGS = copy.deepcopy(BASE_PEFT_KWARGS) BASE_FT_KWARGS["peft_method"] = "" +BASE_FT_KWARGS["prompt_tuning_init"] = "" +BASE_FT_KWARGS["prompt_tuning_init_text"] = "" def test_helper_causal_lm_train_kwargs(): @@ -114,6 +116,9 @@ def test_run_train_fails_data_path_not_exist(): sft_trainer.train(model_args, data_args, training_args, tune_config) +############################# Prompt Tuning Tests ############################# + + def test_run_causallm_pt_and_inference(): """Check if we can bootstrap and peft tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: @@ -125,7 +130,7 @@ def test_run_causallm_pt_and_inference(): # validate peft tuning configs _validate_training(tempdir) - checkpoint_path = os.path.join(tempdir, "checkpoint-5") + checkpoint_path = _get_checkpoint_path(tempdir) adapter_config = _get_adapter_config(checkpoint_path) _validate_adapter_config(adapter_config, "PROMPT_TUNING", BASE_PEFT_KWARGS) @@ -140,6 +145,53 @@ def test_run_causallm_pt_and_inference(): assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference +def test_run_causallm_pt_init_text(): + """Check if we can bootstrap and peft tune causallm models with init text as 'TEXT'""" + with tempfile.TemporaryDirectory() as tempdir: + pt_init_text = copy.deepcopy(BASE_PEFT_KWARGS) + pt_init_text["output_dir"] = tempdir + pt_init_text["prompt_tuning_init"] = "TEXT" + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + pt_init_text + ) + sft_trainer.train(model_args, data_args, training_args, tune_config) + + # validate peft tuning configs + _validate_training(tempdir) + checkpoint_path = _get_checkpoint_path(tempdir) + adapter_config = _get_adapter_config(checkpoint_path) + _validate_adapter_config(adapter_config, "PROMPT_TUNING", pt_init_text) + + +invalid_params_map = [ + ("num_train_epochs", 0, "num_train_epochs has to be an integer/float >= 1"), + ( + "gradient_accumulation_steps", + 0, + "gradient_accumulation_steps has to be an integer >= 1", + ), +] + + +@pytest.mark.parametrize( + "param_name,param_val,exc_msg", + invalid_params_map, + ids=["num_train_epochs", "grad_acc_steps"], +) +def test_run_causallm_pt_invalid_params(param_name, param_val, exc_msg): + """Check if error is raised when invalid params are used to peft tune causallm models""" + with tempfile.TemporaryDirectory() as tempdir: + invalid_params = copy.deepcopy(BASE_PEFT_KWARGS) + invalid_params["output_dir"] = tempdir + invalid_params[param_name] = param_val + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( + invalid_params + ) + + with pytest.raises(ValueError, match=exc_msg): + sft_trainer.train(model_args, data_args, training_args, tune_config) + + def test_run_causallm_pt_with_validation(): """Check if we can bootstrap and peft tune causallm models with validation dataset""" with tempfile.TemporaryDirectory() as tempdir: @@ -154,28 +206,49 @@ def test_run_causallm_pt_with_validation(): assert data_args.validation_data_path == TWITTER_COMPLAINTS_DATA sft_trainer.train(model_args, data_args, training_args, tune_config) - _validate_training(tempdir) - - eval_loss_file_path = os.path.join(tempdir, "eval_loss.jsonl") - assert os.path.exists(eval_loss_file_path) - assert os.path.getsize(eval_loss_file_path) > 0 - - -def test_run_causallm_lora_and_inference(): + _validate_training(tempdir, check_eval=True) + + +############################# Lora Tests ############################# + +target_modules_val_map = [ + (None, ["q_proj", "v_proj"]), + ( + ["q_proj", "k_proj", "v_proj", "o_proj"], + ["q_proj", "k_proj", "v_proj", "o_proj"], + ), + ( + ["all-linear"], + ["o_proj", "q_proj", "gate_proj", "down_proj", "k_proj", "up_proj", "v_proj"], + ), +] + + +@pytest.mark.parametrize( + "target_modules,expected", + target_modules_val_map, + ids=["default", "custom_target_modules", "all_linear_target_modules"], +) +def test_run_causallm_lora_and_inference(request, target_modules, expected): """Check if we can bootstrap and lora tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: - BASE_LORA_KWARGS["output_dir"] = tempdir + base_lora_kwargs = copy.deepcopy(BASE_LORA_KWARGS) + base_lora_kwargs["output_dir"] = tempdir + if "default" not in request._pyfuncitem.callspec.id: + base_lora_kwargs["target_modules"] = target_modules + model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( - BASE_LORA_KWARGS + base_lora_kwargs ) sft_trainer.train(model_args, data_args, training_args, tune_config) # validate lora tuning configs _validate_training(tempdir) - checkpoint_path = os.path.join(tempdir, "checkpoint-5") + checkpoint_path = _get_checkpoint_path(tempdir) adapter_config = _get_adapter_config(checkpoint_path) - _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) - for module in ["q_proj", "v_proj"]: # default target_modules used + _validate_adapter_config(adapter_config, "LORA", base_lora_kwargs) + + for module in expected: assert module in adapter_config.get("target_modules") # Load the model @@ -189,55 +262,7 @@ def test_run_causallm_lora_and_inference(): assert "Simply put, the theory of relativity states that" in output_inference -def test_run_train_lora_target_modules(): - """Check runs lora tuning with given list of target modules.""" - with tempfile.TemporaryDirectory() as tempdir: - lora_target_modules = copy.deepcopy(BASE_LORA_KWARGS) - lora_target_modules["output_dir"] = tempdir - lora_target_modules["target_modules"] = ["q_proj", "k_proj", "v_proj", "o_proj"] - - model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( - lora_target_modules - ) - sft_trainer.train(model_args, data_args, training_args, tune_config) - - # validate lora tuning configs - _validate_training(tempdir) - checkpoint_path = os.path.join(tempdir, "checkpoint-5") - adapter_config = _get_adapter_config(checkpoint_path) - _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) - for module in lora_target_modules["target_modules"]: - assert module in adapter_config.get("target_modules") - - -def test_run_train_lora_target_modules_all_linear(): - """Check runs lora tuning with all linear target modules.""" - with tempfile.TemporaryDirectory() as tempdir: - lora_target_modules = copy.deepcopy(BASE_LORA_KWARGS) - lora_target_modules["output_dir"] = tempdir - lora_target_modules["target_modules"] = ["all-linear"] - - model_args, data_args, training_args, tune_config = causal_lm_train_kwargs( - lora_target_modules - ) - sft_trainer.train(model_args, data_args, training_args, tune_config) - - # validate lora tuning configs - _validate_training(tempdir) - checkpoint_path = os.path.join(tempdir, "checkpoint-5") - adapter_config = _get_adapter_config(checkpoint_path) - _validate_adapter_config(adapter_config, "LORA", BASE_LORA_KWARGS) - llama_expected_modules = [ - "o_proj", - "q_proj", - "gate_proj", - "down_proj", - "k_proj", - "up_proj", - "v_proj", - ] - for module in llama_expected_modules: - assert module in adapter_config.get("target_modules") +############################# Finetuning Tests ############################# def test_run_causallm_ft_and_inference(): @@ -251,7 +276,7 @@ def test_run_causallm_ft_and_inference(): # validate ft tuning configs _validate_training(tempdir) - checkpoint_path = os.path.join(tempdir, "checkpoint-5") + checkpoint_path = _get_checkpoint_path(tempdir) adapter_config = _get_adapter_config(checkpoint_path) _validate_adapter_config(adapter_config, "PROMPT_TUNING", BASE_FT_KWARGS) @@ -266,12 +291,21 @@ def test_run_causallm_ft_and_inference(): assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference -def _validate_training(tempdir): +def _validate_training(tempdir, check_eval=False): assert any(x.startswith("checkpoint-") for x in os.listdir(tempdir)) train_loss_file_path = "{}/train_loss.jsonl".format(tempdir) assert os.path.exists(train_loss_file_path) == True assert os.path.getsize(train_loss_file_path) > 0 + if check_eval: + eval_loss_file_path = os.path.join(tempdir, "eval_loss.jsonl") + assert os.path.exists(eval_loss_file_path) + assert os.path.getsize(eval_loss_file_path) > 0 + + +def _get_checkpoint_path(dir_path): + return os.path.join(dir_path, "checkpoint-5") + def _get_adapter_config(dir_path): with open(os.path.join(dir_path, "adapter_config.json")) as f: