From 77435877f41778fe99170876a26fbbdb82b36715 Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Fri, 21 Apr 2023 17:07:39 -0400 Subject: [PATCH 01/12] Prelimineary dataset tests --- .../datasets/spider_reader.py | 3 +- tests/consts.py | 126 ++++++++++++++++++ tests/test_datasets.py | 29 +++- 3 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 tests/consts.py diff --git a/finetuning/lightning_modules/datasets/spider_reader.py b/finetuning/lightning_modules/datasets/spider_reader.py index e85d84ae..44d42d13 100644 --- a/finetuning/lightning_modules/datasets/spider_reader.py +++ b/finetuning/lightning_modules/datasets/spider_reader.py @@ -10,7 +10,8 @@ from finetuning.lightning_modules.datasets.base_reader import NL2CodeDataset, NL2CodeDataModule from preprocessing.preprocess_spider import decompose_sql, pd_df_from_dict -DB_INFO_FILE = os.path.join(os.path.dirname(__file__), '../../../data/squall/db_info_wtq.json') +# DB_INFO_FILE = os.path.join(os.path.dirname(__file__), '../../../data/squall/db_info_wtq.json') +DB_INFO_FILE = os.path.join(os.path.dirname(__file__), f"{os.environ['NLP4CODE_TEST_DATA_PATH']}/squall/db_info_wtq.json") with open(DB_INFO_FILE, "r") as f: full_db_info = json.load(f) diff --git a/tests/consts.py b/tests/consts.py new file mode 100644 index 00000000..815d669b --- /dev/null +++ b/tests/consts.py @@ -0,0 +1,126 @@ +import os +from typing import List, Dict, Tuple + +NLP4CODE_TEST_DATA_PATH = os.environ["NLP4CODE_TEST_DATA_PATH"] + + +from execution.executors import MathExecutor +from finetuning.lightning_modules.datasets.base_reader import ( + NL2CodeDataModule, + NL2CodeDataset, +) +from finetuning.lightning_modules.datasets.mathqa_reader import ( + FewShotMathQADataModule, + FewShotMathQADataset, + MathQADataset, + MathQADataModule, + MathQAEndVerificationDataset, + MathQAEndVerificationDataModule, +) +from finetuning.lightning_modules.datasets.mbpp_reader import ( + FewShotMBPPDataModule, + FewShotMBPPDataset, + MBPPEndVerificationDataModule, + MBPPEndVerificationDataset, +) +from finetuning.lightning_modules.datasets.spider_reader import ( + FewShotSpiderDataset, + FewShotSQLDataModule, + SpiderDataset, + SpiderEndVerificationDataset, + SQLEndVerificationDataModule, + Text2SqlDataModule, +) + +# TODO: use special test string for test transformer model name? +TEST_TRANSFORMER_MODEL_NAME = "EleutherAI/gpt-neo-125M" + +# ======== datasets ======== + +# TODO: better way to do this? (custom types for each kwargs?) +# list of (dataset, **init_kwargs) tuples +FEW_SHOT_DATASETS: List[Tuple[NL2CodeDataset, Dict]] = [ + ( + FewShotMathQADataset, + { + "prompt_file": "prompt_files/mathqa_non_idiomatic_code_init_val.txt", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", + "mode": "test_few_shot", + }, + ), + ( + FewShotMBPPDataset, + { + "prompt_file": "prompt_files/mbpp_prompt_1_test.txt", + "add_assertion_n": 1, + "mode": "test_few_shot", + }, + ), + ( + FewShotSpiderDataset, + { + "prompt_file": "prompt_files/spider_codex_cot_sql_prompt_baseline_very_short.txt", + "mode": "test_few_shot", + }, + ), +] + + +DATASETS: List[Tuple[NL2CodeDataset, Dict]] = [ + ( + MathQADataset, + { + # "file_path": "data/mathqa/train-python.jsonl", + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + # TODO: test different modes + "mode": "train", + }, + ), + ( + SpiderDataset, + { + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "mode": "train", + }, + ), +] + +# FEW_SHOT_DATA_MODULES: List[NL2CodeDataModule] = [ +# FewShotMathQADataModule, +# FewShotMBPPDataModule, +# FewShotSQLDataModule, +# ] + +# train_file_path: data/mathqa/train-python.jsonl +# val_file_path: data/mathqa/val-python.jsonl +DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ + ( + MathQADataModule, + { + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "train_set_init_args": { + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl" + }, + "val_set_init_args": { + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup.jsonl" + }, + }, + ), + ( + Text2SqlDataModule, + { + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "train_set_init_args": { + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl" + }, + "val_set_init_args": { + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed.jsonl" + }, + "train_max_instances": 10, + "val_max_instances": 10, + }, + ), +] diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 384b7c60..4b1aa3b7 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,14 +1,41 @@ import unittest from os import path, sys +from typing import List, Tuple, Dict ROOT_DIR = path.dirname(path.dirname(path.abspath(__file__))) sys.path.append(ROOT_DIR) -from execution.executors import MathExecutor + +from tests.consts import DATA_MODULES, DATASETS, FEW_SHOT_DATASETS + + +# test cases to add: +# - test base_reader classes are abstract +# - test different modes (train, test, few_shot_test) class TestDatasets(unittest.TestCase): + # TODO: NotImplemented error testing + # def test_few_shot_datasets(self): + # for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: + # few_shot_dataset = few_shot_dataset_cls(**few_shot_dataset_init_kwargs) + + def test_finetune_datasets(self): + for finetune_dataset_cls, finetune_dataset_init_kwargs in DATASETS: + finetune_dataset = finetune_dataset_cls(**finetune_dataset_init_kwargs) + + +class TestDataModules(unittest.TestCase): def test_gsmath(self): # TODO: this is dummy test self.assertTrue(True) + + def test_finetune_data_modules(self): + for finetune_data_module_cls, finetune_data_module_init_kwargs in DATA_MODULES: + finetune_data_module = finetune_data_module_cls( + **finetune_data_module_init_kwargs + ) + # train_dl = finetune_data_module.train_dataloader() + # val_dl = finetune_data_module.val_dataloader() + # print(type(train_dl)) From 0f7697e09dc40194c1a0ad4eccf77065248284ce Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Sun, 23 Apr 2023 21:56:32 -0400 Subject: [PATCH 02/12] Test data modules: dataloaders --- finetuning/lightning_modules/datasets/mathqa_reader.py | 3 ++- tests/test_datasets.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/finetuning/lightning_modules/datasets/mathqa_reader.py b/finetuning/lightning_modules/datasets/mathqa_reader.py index 70d7497d..94233836 100644 --- a/finetuning/lightning_modules/datasets/mathqa_reader.py +++ b/finetuning/lightning_modules/datasets/mathqa_reader.py @@ -137,7 +137,8 @@ def get_train_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]: def get_test_instance(self, example: Dict[str, Any]) -> List[Dict[str, Any]]: # parse the answer and add the field example["original_answer"] = example["answer"] - example["answer"] = example["answer"].split("\n####")[-1].strip() + # TODO: in data/mathqa/val_dedup.jsonl, example["answer"] are floats + # example["answer"] = example["answer"].split("\n####")[-1].strip() return [self.get_example_dict(example, example["text"], "", train_mode=False)] diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 4b1aa3b7..d70e3f46 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -9,6 +9,8 @@ from tests.consts import DATA_MODULES, DATASETS, FEW_SHOT_DATASETS +from torch.utils.data import DataLoader + # test cases to add: # - test base_reader classes are abstract @@ -36,6 +38,7 @@ def test_finetune_data_modules(self): finetune_data_module = finetune_data_module_cls( **finetune_data_module_init_kwargs ) - # train_dl = finetune_data_module.train_dataloader() - # val_dl = finetune_data_module.val_dataloader() - # print(type(train_dl)) + train_dl = finetune_data_module.train_dataloader() + self.assertTrue(isinstance(train_dl, DataLoader)) + val_dl = finetune_data_module.val_dataloader() + self.assertTrue(isinstance(val_dl, DataLoader)) From 8615ac4758261abae75febc38e34d4842fd1fcb1 Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Sun, 23 Apr 2023 22:22:30 -0400 Subject: [PATCH 03/12] Update model testing with smaller models for more coverage --- .../models/seq2seq_model_util.py | 8 ++- tests/consts.py | 12 ++++- tests/test_models.py | 54 ++++++++++--------- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/finetuning/lightning_modules/models/seq2seq_model_util.py b/finetuning/lightning_modules/models/seq2seq_model_util.py index 4397b150..35740158 100755 --- a/finetuning/lightning_modules/models/seq2seq_model_util.py +++ b/finetuning/lightning_modules/models/seq2seq_model_util.py @@ -83,14 +83,18 @@ def get_model(model_name: str, use_cache=not gradient_ckpt, **additional_init_args) elif model_name.startswith("Salesforce/codegen-"): + # TODO: using float32 here for tests + # RunTime error: "LayerNormKernelImpl" not implemented for 'Half' codegen + # https://github.com/huggingface/transformers/issues/21989 tokenizer = CodeGenTokenizer.from_pretrained(model_name, - additional_special_tokens=additional_special_tokens) + additional_special_tokens=additional_special_tokens, + torch_dtype=torch.float32) tokenizer.pad_token = tokenizer.eos_token if not tokenizer_only: model = CodeGenForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, - torch_dtype=torch.float16, + torch_dtype=torch.float32, # device_map="auto", use_cache=True) if len(additional_special_tokens) > 0: diff --git a/tests/consts.py b/tests/consts.py index 815d669b..e76db7c1 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -94,8 +94,6 @@ # FewShotSQLDataModule, # ] -# train_file_path: data/mathqa/train-python.jsonl -# val_file_path: data/mathqa/val-python.jsonl DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ ( MathQADataModule, @@ -124,3 +122,13 @@ }, ), ] + +# ======== models ======== + +TEST_MODEL_TRANSFORMER_MODEL_NAMES = [ + "EleutherAI/gpt-neo-125M", + "Salesforce/codet5-small", + "Salesforce/codegen-350M-multi", +] + +TEST_MODEL_EXECUTOR_CLS = "execution.executors.MathExecutor" diff --git a/tests/test_models.py b/tests/test_models.py index ca4f567f..2dd99d4c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -2,6 +2,8 @@ from os import path, sys +from tests.consts import TEST_MODEL_EXECUTOR_CLS, TEST_MODEL_TRANSFORMER_MODEL_NAMES + ROOT_DIR = path.dirname(path.dirname(path.abspath(__file__))) sys.path.append(ROOT_DIR) @@ -11,27 +13,31 @@ class TestModels(unittest.TestCase): - def test_gpt_neo(self): - model = Seq2SeqModel( - transformer_model_name="EleutherAI/gpt-neo-125M", - executor_cls="execution.executors.MathExecutor", - ) - - test_input_str = [ - "# write a python program that adds two integers", - "# write a python program that adds two integers", - ] - context_tokenizer_outputs = model.tokenizer(test_input_str, return_tensors="pt") - input_ids = context_tokenizer_outputs["input_ids"] - attention_mask = context_tokenizer_outputs["attention_mask"] - - generation_result = model.forward( - input_ids, - attention_mask=attention_mask, - metadata=[{"nl": test_input_str[0]}, {"nl": test_input_str[1]}], - ) - - self.assertEqual(len(generation_result), 2) - self.assertEqual( - all(["generated_program" in result for result in generation_result]), True - ) + def test_models(self): + for model_name in TEST_MODEL_TRANSFORMER_MODEL_NAMES: + model = Seq2SeqModel( + transformer_model_name=model_name, + executor_cls=TEST_MODEL_EXECUTOR_CLS, + ) + + test_input_str = [ + "# write a python program that adds two integers", + "# write a python program that adds two integers", + ] + context_tokenizer_outputs = model.tokenizer( + test_input_str, return_tensors="pt" + ) + input_ids = context_tokenizer_outputs["input_ids"] + attention_mask = context_tokenizer_outputs["attention_mask"] + + generation_result = model.forward( + input_ids, + attention_mask=attention_mask, + metadata=[{"nl": test_input_str[0]}, {"nl": test_input_str[1]}], + ) + + self.assertEqual(len(generation_result), 2) + self.assertEqual( + all(["generated_program" in result for result in generation_result]), + True, + ) From bd7294875073597e99d84035edb090faae9cd90e Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Mon, 24 Apr 2023 11:38:18 -0400 Subject: [PATCH 04/12] Remove actions CUDA_VISIBLE_DEVICES --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e612d423..e114362b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -60,9 +60,9 @@ jobs: # runs-on: ubuntu-22.04 # do not expose sensitive environment variables in this yaml - env: + # env: # TODO: dynamically choose GPUs if tests end up using them - CUDA_VISIBLE_DEVICES: 1 + # CUDA_VISIBLE_DEVICES: 1 steps: - uses: actions/checkout@v2 From 1f1b5f300bd97faa8a19be9e1b30b2384f7a4378 Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Fri, 28 Apr 2023 14:25:33 -0400 Subject: [PATCH 05/12] Fix imports (SpiderDataset promptify broken --- tests/consts.py | 80 +++++++++++++++++++++--------------------- tests/test_datasets.py | 21 +++++------ 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index e76db7c1..340d0ec7 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -6,30 +6,30 @@ from execution.executors import MathExecutor from finetuning.lightning_modules.datasets.base_reader import ( - NL2CodeDataModule, + # NL2CodeDataModule, NL2CodeDataset, ) from finetuning.lightning_modules.datasets.mathqa_reader import ( - FewShotMathQADataModule, + # FewShotMathQADataModule, FewShotMathQADataset, MathQADataset, - MathQADataModule, - MathQAEndVerificationDataset, - MathQAEndVerificationDataModule, + # MathQADataModule, + # MathQAEndVerificationDataset, + # MathQAEndVerificationDataModule, ) from finetuning.lightning_modules.datasets.mbpp_reader import ( - FewShotMBPPDataModule, + # FewShotMBPPDataModule, FewShotMBPPDataset, - MBPPEndVerificationDataModule, - MBPPEndVerificationDataset, + # MBPPEndVerificationDataModule, + # MBPPEndVerificationDataset, ) from finetuning.lightning_modules.datasets.spider_reader import ( FewShotSpiderDataset, - FewShotSQLDataModule, + # FewShotSQLDataModule, SpiderDataset, - SpiderEndVerificationDataset, - SQLEndVerificationDataModule, - Text2SqlDataModule, + # SpiderEndVerificationDataset, + # SQLEndVerificationDataModule, + # Text2SqlDataModule, ) # TODO: use special test string for test transformer model name? @@ -94,34 +94,34 @@ # FewShotSQLDataModule, # ] -DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ - ( - MathQADataModule, - { - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "train_set_init_args": { - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl" - }, - "val_set_init_args": { - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup.jsonl" - }, - }, - ), - ( - Text2SqlDataModule, - { - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "train_set_init_args": { - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl" - }, - "val_set_init_args": { - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed.jsonl" - }, - "train_max_instances": 10, - "val_max_instances": 10, - }, - ), -] +# DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ +# ( +# MathQADataModule, +# { +# "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, +# "train_set_init_args": { +# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl" +# }, +# "val_set_init_args": { +# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup.jsonl" +# }, +# }, +# ), +# ( +# Text2SqlDataModule, +# { +# "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, +# "train_set_init_args": { +# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl" +# }, +# "val_set_init_args": { +# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed.jsonl" +# }, +# "train_max_instances": 10, +# "val_max_instances": 10, +# }, +# ), +# ] # ======== models ======== diff --git a/tests/test_datasets.py b/tests/test_datasets.py index d70e3f46..c1c41838 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -7,7 +7,8 @@ sys.path.append(ROOT_DIR) -from tests.consts import DATA_MODULES, DATASETS, FEW_SHOT_DATASETS +# from tests.consts import DATA_MODULES, DATASETS, FEW_SHOT_DATASETS +from tests.consts import DATASETS, FEW_SHOT_DATASETS from torch.utils.data import DataLoader @@ -33,12 +34,12 @@ def test_gsmath(self): # TODO: this is dummy test self.assertTrue(True) - def test_finetune_data_modules(self): - for finetune_data_module_cls, finetune_data_module_init_kwargs in DATA_MODULES: - finetune_data_module = finetune_data_module_cls( - **finetune_data_module_init_kwargs - ) - train_dl = finetune_data_module.train_dataloader() - self.assertTrue(isinstance(train_dl, DataLoader)) - val_dl = finetune_data_module.val_dataloader() - self.assertTrue(isinstance(val_dl, DataLoader)) + # def test_finetune_data_modules(self): + # for finetune_data_module_cls, finetune_data_module_init_kwargs in DATA_MODULES: + # finetune_data_module = finetune_data_module_cls( + # **finetune_data_module_init_kwargs + # ) + # train_dl = finetune_data_module.train_dataloader() + # self.assertTrue(isinstance(train_dl, DataLoader)) + # val_dl = finetune_data_module.val_dataloader() + # self.assertTrue(isinstance(val_dl, DataLoader)) From 1e323899f4fd7b2554b4076becbeb4a9bc514f9d Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Fri, 28 Apr 2023 21:11:09 -0400 Subject: [PATCH 06/12] pass dataset tests --- tests/consts.py | 38 ++++++++++++++++++++++---------------- tests/test_datasets.py | 8 +++++--- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index 340d0ec7..c37a2552 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -32,36 +32,41 @@ # Text2SqlDataModule, ) -# TODO: use special test string for test transformer model name? +# TODO: use special test string for test transformer model name? (don't load model) TEST_TRANSFORMER_MODEL_NAME = "EleutherAI/gpt-neo-125M" # ======== datasets ======== # TODO: better way to do this? (custom types for each kwargs?) +# TODO: make sure to keep dataset files up to date here # list of (dataset, **init_kwargs) tuples FEW_SHOT_DATASETS: List[Tuple[NL2CodeDataset, Dict]] = [ ( FewShotMathQADataset, { - "prompt_file": "prompt_files/mathqa_non_idiomatic_code_init_val.txt", + "exemplar_file_path": "prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl", "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", - "mode": "test_few_shot", + "mode": "test", }, ), ( FewShotMBPPDataset, { - "prompt_file": "prompt_files/mbpp_prompt_1_test.txt", - "add_assertion_n": 1, - "mode": "test_few_shot", + "exemplar_file_path": "prompt_files/mbpp-official_first_3-10_exemplars.jsonl", + # "add_assertion_n": 1, + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_prompt.jsonl", + "mode": "test", }, ), ( FewShotSpiderDataset, { - "prompt_file": "prompt_files/spider_codex_cot_sql_prompt_baseline_very_short.txt", - "mode": "test_few_shot", + "exemplar_file_path": "prompt_files/spider-8_exemplars.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", + "mode": "test", }, ), ] @@ -78,14 +83,15 @@ "mode": "train", }, ), - ( - SpiderDataset, - { - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "mode": "train", - }, - ), + # TODO: SpiderDataset prompt_function + # ( + # SpiderDataset, + # { + # "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", + # "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + # "mode": "train", + # }, + # ), ] # FEW_SHOT_DATA_MODULES: List[NL2CodeDataModule] = [ diff --git a/tests/test_datasets.py b/tests/test_datasets.py index c1c41838..1a87be87 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -20,9 +20,11 @@ class TestDatasets(unittest.TestCase): # TODO: NotImplemented error testing - # def test_few_shot_datasets(self): - # for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: - # few_shot_dataset = few_shot_dataset_cls(**few_shot_dataset_init_kwargs) + def test_few_shot_datasets(self): + for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: + few_shot_dataset = few_shot_dataset_cls( + **few_shot_dataset_init_kwargs, + ) def test_finetune_datasets(self): for finetune_dataset_cls, finetune_dataset_init_kwargs in DATASETS: From be807769c59d05223d0c797ed53db63810bcfbcd Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Fri, 28 Apr 2023 23:10:49 -0400 Subject: [PATCH 07/12] Test data modules --- tests/consts.py | 62 ++++++++++++++++++++++++++++-------------- tests/test_datasets.py | 39 ++++++++++++++++++++------ 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index c37a2552..72103b53 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -4,32 +4,19 @@ NLP4CODE_TEST_DATA_PATH = os.environ["NLP4CODE_TEST_DATA_PATH"] -from execution.executors import MathExecutor from finetuning.lightning_modules.datasets.base_reader import ( - # NL2CodeDataModule, NL2CodeDataset, ) from finetuning.lightning_modules.datasets.mathqa_reader import ( - # FewShotMathQADataModule, FewShotMathQADataset, MathQADataset, - # MathQADataModule, - # MathQAEndVerificationDataset, - # MathQAEndVerificationDataModule, ) from finetuning.lightning_modules.datasets.mbpp_reader import ( - # FewShotMBPPDataModule, FewShotMBPPDataset, - # MBPPEndVerificationDataModule, - # MBPPEndVerificationDataset, ) from finetuning.lightning_modules.datasets.spider_reader import ( FewShotSpiderDataset, - # FewShotSQLDataModule, SpiderDataset, - # SpiderEndVerificationDataset, - # SQLEndVerificationDataModule, - # Text2SqlDataModule, ) # TODO: use special test string for test transformer model name? (don't load model) @@ -56,7 +43,7 @@ "exemplar_file_path": "prompt_files/mbpp-official_first_3-10_exemplars.jsonl", # "add_assertion_n": 1, "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_prompt.jsonl", + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl", "mode": "test", }, ), @@ -65,7 +52,17 @@ { "exemplar_file_path": "prompt_files/spider-8_exemplars.jsonl", "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl", + "mode": "test", + }, + ), + ( + FewShotSpiderDataset, + { + "exemplar_file_path": "prompt_files/wtq-8_exemplars.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + # TODO: why does wtq_restored_dev.jsonl error + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/squall/wtq_restored_test.jsonl", "mode": "test", }, ), @@ -94,11 +91,36 @@ # ), ] -# FEW_SHOT_DATA_MODULES: List[NL2CodeDataModule] = [ -# FewShotMathQADataModule, -# FewShotMBPPDataModule, -# FewShotSQLDataModule, -# ] +DATA_MODULE_INIT_ARGS: List[Tuple[str, Dict]] = [ + ( + "FewShotMathQADataset", + { + "exemplar_file_path": "prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", + "mode": "test", + }, + ), + ( + "FewShotMBPPDataset", + { + "exemplar_file_path": "prompt_files/mbpp-official_first_3-10_exemplars.jsonl", + # "add_assertion_n": 1, + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl", + "mode": "test", + }, + ), + ( + "FewShotSpiderDataset", + { + "exemplar_file_path": "prompt_files/spider-8_exemplars.jsonl", + "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl", + "mode": "test", + }, + ), +] # DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ # ( diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 1a87be87..42d58e23 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -12,6 +12,11 @@ from torch.utils.data import DataLoader +from finetuning.lightning_modules.datasets.base_datamodule import ( + NL2CodeDataModule, + FewShotNL2CodeDataModule, +) + # test cases to add: # - test base_reader classes are abstract @@ -36,12 +41,28 @@ def test_gsmath(self): # TODO: this is dummy test self.assertTrue(True) - # def test_finetune_data_modules(self): - # for finetune_data_module_cls, finetune_data_module_init_kwargs in DATA_MODULES: - # finetune_data_module = finetune_data_module_cls( - # **finetune_data_module_init_kwargs - # ) - # train_dl = finetune_data_module.train_dataloader() - # self.assertTrue(isinstance(train_dl, DataLoader)) - # val_dl = finetune_data_module.val_dataloader() - # self.assertTrue(isinstance(val_dl, DataLoader)) + def test_few_shot_data_modules(self): + for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: + few_shot_dataset_cls_str = few_shot_dataset_cls.__name__ + + few_shot_dataset_init_kwargs = few_shot_dataset_init_kwargs.copy() + few_shot_dataset_init_kwargs[ + "val_file_path" + ] = few_shot_dataset_init_kwargs["file_path"] + few_shot_dataset_init_kwargs["batch_size"] = 1 + few_shot_dataset_init_kwargs["val_batch_size"] = 1 + + del few_shot_dataset_init_kwargs["file_path"] + del few_shot_dataset_init_kwargs["mode"] + + few_shot_data_module = FewShotNL2CodeDataModule( + # dataset_cls=f"finetuning.lightning_modules.datasets.{few_shot_dataset_cls_str}", + dataset_cls=few_shot_dataset_cls_str, + **few_shot_dataset_init_kwargs, + ) + + # no train_dataloader on few shot data module + with self.assertRaises(NotImplementedError): + train_dl = few_shot_data_module.train_dataloader() + val_dl = few_shot_data_module.val_dataloader() + self.assertTrue(isinstance(val_dl, DataLoader)) From e97d87734eb9d11f534b8c1bc4baa1f289f4f39b Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Fri, 28 Apr 2023 23:40:28 -0400 Subject: [PATCH 08/12] Cleanup --- tests/consts.py | 80 ++++++++++-------------------------------- tests/test_datasets.py | 58 +++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 77 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index 72103b53..6e84ee6d 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -1,5 +1,5 @@ import os -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional NLP4CODE_TEST_DATA_PATH = os.environ["NLP4CODE_TEST_DATA_PATH"] @@ -24,6 +24,24 @@ # ======== datasets ======== + +class TestFewShotDatasetInitKwargs: + exemplar_file_path: str + transformer_model_name: str + file_path: str + mode: str = "test" + + def __init__( + self, + exemplar_file_path: str, + file_path: str, + transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME, + ): + self.exemplar_file_path = exemplar_file_path + self.file_path = file_path + self.transformer_model_name = transformer_model_name + + # TODO: better way to do this? (custom types for each kwargs?) # TODO: make sure to keep dataset files up to date here # list of (dataset, **init_kwargs) tuples @@ -91,66 +109,6 @@ # ), ] -DATA_MODULE_INIT_ARGS: List[Tuple[str, Dict]] = [ - ( - "FewShotMathQADataset", - { - "exemplar_file_path": "prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", - "mode": "test", - }, - ), - ( - "FewShotMBPPDataset", - { - "exemplar_file_path": "prompt_files/mbpp-official_first_3-10_exemplars.jsonl", - # "add_assertion_n": 1, - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl", - "mode": "test", - }, - ), - ( - "FewShotSpiderDataset", - { - "exemplar_file_path": "prompt_files/spider-8_exemplars.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl", - "mode": "test", - }, - ), -] - -# DATA_MODULES: List[Tuple[NL2CodeDataModule, Dict]] = [ -# ( -# MathQADataModule, -# { -# "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, -# "train_set_init_args": { -# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl" -# }, -# "val_set_init_args": { -# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup.jsonl" -# }, -# }, -# ), -# ( -# Text2SqlDataModule, -# { -# "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, -# "train_set_init_args": { -# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl" -# }, -# "val_set_init_args": { -# "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed.jsonl" -# }, -# "train_max_instances": 10, -# "val_max_instances": 10, -# }, -# ), -# ] - # ======== models ======== TEST_MODEL_TRANSFORMER_MODEL_NAMES = [ diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 42d58e23..8e179123 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -36,29 +36,37 @@ def test_finetune_datasets(self): finetune_dataset = finetune_dataset_cls(**finetune_dataset_init_kwargs) -class TestDataModules(unittest.TestCase): - def test_gsmath(self): - # TODO: this is dummy test - self.assertTrue(True) +def create_data_module_init_kwargs( + dataset_init_kwargs: Dict, is_few_shot: bool +) -> Dict: + data_module_init_kwargs = dataset_init_kwargs.copy() + + data_module_init_kwargs["batch_size"] = 1 + + data_module_init_kwargs["val_file_path"] = data_module_init_kwargs["file_path"] + data_module_init_kwargs["val_batch_size"] = 1 + if not is_few_shot: + data_module_init_kwargs["train_file_path"] = data_module_init_kwargs[ + "file_path" + ] + + del data_module_init_kwargs["file_path"] + del data_module_init_kwargs["mode"] + return data_module_init_kwargs + +class TestDataModules(unittest.TestCase): def test_few_shot_data_modules(self): + # instantiate each few shot dataset as part of a data module for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: few_shot_dataset_cls_str = few_shot_dataset_cls.__name__ - - few_shot_dataset_init_kwargs = few_shot_dataset_init_kwargs.copy() - few_shot_dataset_init_kwargs[ - "val_file_path" - ] = few_shot_dataset_init_kwargs["file_path"] - few_shot_dataset_init_kwargs["batch_size"] = 1 - few_shot_dataset_init_kwargs["val_batch_size"] = 1 - - del few_shot_dataset_init_kwargs["file_path"] - del few_shot_dataset_init_kwargs["mode"] + few_shot_data_module_init_kwargs = create_data_module_init_kwargs( + few_shot_dataset_init_kwargs, True + ) few_shot_data_module = FewShotNL2CodeDataModule( - # dataset_cls=f"finetuning.lightning_modules.datasets.{few_shot_dataset_cls_str}", dataset_cls=few_shot_dataset_cls_str, - **few_shot_dataset_init_kwargs, + **few_shot_data_module_init_kwargs, ) # no train_dataloader on few shot data module @@ -66,3 +74,21 @@ def test_few_shot_data_modules(self): train_dl = few_shot_data_module.train_dataloader() val_dl = few_shot_data_module.val_dataloader() self.assertTrue(isinstance(val_dl, DataLoader)) + + def test_finetune_data_modules(self): + # instantiate each few shot dataset as part of a data module + for finetune_dataset_cls, finetune_dataset_init_kwargs in DATASETS: + finetune_dataset_cls_str = finetune_dataset_cls.__name__ + finetune_data_module_init_kwargs = create_data_module_init_kwargs( + finetune_dataset_init_kwargs, False + ) + + finetune_data_module = NL2CodeDataModule( + dataset_cls=finetune_dataset_cls_str, + **finetune_data_module_init_kwargs, + ) + + train_dl = finetune_data_module.train_dataloader() + self.assertTrue(isinstance(train_dl, DataLoader)) + val_dl = finetune_data_module.val_dataloader() + self.assertTrue(isinstance(val_dl, DataLoader)) From c82d2d9e2ba1fb4028d3569cdb7d2bc9b5377aed Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Sat, 29 Apr 2023 00:10:43 -0400 Subject: [PATCH 09/12] Add kwargs classes for type control --- tests/consts.py | 115 ++++++++++++++++++++++------------------- tests/test_datasets.py | 41 ++++++++++----- 2 files changed, 90 insertions(+), 66 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index 6e84ee6d..6d76844b 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -5,6 +5,7 @@ from finetuning.lightning_modules.datasets.base_reader import ( + FewShotNL2CodeDataset, NL2CodeDataset, ) from finetuning.lightning_modules.datasets.mathqa_reader import ( @@ -25,90 +26,96 @@ # ======== datasets ======== -class TestFewShotDatasetInitKwargs: - exemplar_file_path: str +# defines kwargs needed to initialize NL2CodeDataset +class TestDatasetInitKwargs: transformer_model_name: str file_path: str - mode: str = "test" + mode: str def __init__( self, - exemplar_file_path: str, file_path: str, + mode: Optional[str] = "train", # default to train transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME, ): - self.exemplar_file_path = exemplar_file_path self.file_path = file_path + self.mode = mode self.transformer_model_name = transformer_model_name +DATASETS: List[Tuple[NL2CodeDataset, TestDatasetInitKwargs]] = [ + ( + MathQADataset, + TestDatasetInitKwargs( + file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl", + ), + ), + # TODO: SpiderDataset prompt_function + # ( + # SpiderDataset, + # TestDatasetInitKwargs( + # file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", + # ), + # ), +] + + +# defines kwargs needed to instantiate FewShotNL2CodeDataset +class TestFewShotDatasetInitKwargs(TestDatasetInitKwargs): + transformer_model_name: str + file_path: str + exemplar_file_path: str + mode: str = "test" + + def __init__( + self, + file_path: str, + exemplar_file_path: str, + transformer_model_name: Optional[str] = TEST_TRANSFORMER_MODEL_NAME, + ): + super().__init__( + file_path=file_path, + transformer_model_name=transformer_model_name, + mode="test", + ) + self.exemplar_file_path = exemplar_file_path + + # TODO: better way to do this? (custom types for each kwargs?) # TODO: make sure to keep dataset files up to date here # list of (dataset, **init_kwargs) tuples -FEW_SHOT_DATASETS: List[Tuple[NL2CodeDataset, Dict]] = [ +FEW_SHOT_DATASETS: List[Tuple[FewShotNL2CodeDataset, TestFewShotDatasetInitKwargs]] = [ ( FewShotMathQADataset, - { - "exemplar_file_path": "prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", - "mode": "test", - }, + TestFewShotDatasetInitKwargs( + exemplar_file_path="prompt_files/mathqa-non_idiomatic_code-annotated-8_exemplars.jsonl", + file_path=f"{NLP4CODE_TEST_DATA_PATH}/mathqa/val_dedup_init_val.jsonl", + ), ), ( FewShotMBPPDataset, - { - "exemplar_file_path": "prompt_files/mbpp-official_first_3-10_exemplars.jsonl", - # "add_assertion_n": 1, - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl", - "mode": "test", - }, + TestFewShotDatasetInitKwargs( + exemplar_file_path="prompt_files/mbpp-official_first_3-10_exemplars.jsonl", + file_path=f"{NLP4CODE_TEST_DATA_PATH}/mbpp/mbpp_test.jsonl", + ), ), ( FewShotSpiderDataset, - { - "exemplar_file_path": "prompt_files/spider-8_exemplars.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl", - "mode": "test", - }, + TestFewShotDatasetInitKwargs( + exemplar_file_path="prompt_files/spider-8_exemplars.jsonl", + file_path=f"{NLP4CODE_TEST_DATA_PATH}/spider/dev_processed_db_path.jsonl", + ), ), ( FewShotSpiderDataset, - { - "exemplar_file_path": "prompt_files/wtq-8_exemplars.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, + TestFewShotDatasetInitKwargs( + exemplar_file_path="prompt_files/wtq-8_exemplars.jsonl", # TODO: why does wtq_restored_dev.jsonl error - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/squall/wtq_restored_test.jsonl", - "mode": "test", - }, + file_path=f"{NLP4CODE_TEST_DATA_PATH}/squall/wtq_restored_test.jsonl", + ), ), ] - -DATASETS: List[Tuple[NL2CodeDataset, Dict]] = [ - ( - MathQADataset, - { - # "file_path": "data/mathqa/train-python.jsonl", - "file_path": f"{NLP4CODE_TEST_DATA_PATH}/mathqa/train_dedup.jsonl", - "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - # TODO: test different modes - "mode": "train", - }, - ), - # TODO: SpiderDataset prompt_function - # ( - # SpiderDataset, - # { - # "file_path": f"{NLP4CODE_TEST_DATA_PATH}/spider/train_spider_processed_v2.jsonl", - # "transformer_model_name": TEST_TRANSFORMER_MODEL_NAME, - # "mode": "train", - # }, - # ), -] - # ======== models ======== TEST_MODEL_TRANSFORMER_MODEL_NAMES = [ diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 8e179123..859ad623 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -8,7 +8,7 @@ # from tests.consts import DATA_MODULES, DATASETS, FEW_SHOT_DATASETS -from tests.consts import DATASETS, FEW_SHOT_DATASETS +from tests.consts import DATASETS, FEW_SHOT_DATASETS, TestDatasetInitKwargs from torch.utils.data import DataLoader @@ -27,38 +27,52 @@ class TestDatasets(unittest.TestCase): # TODO: NotImplemented error testing def test_few_shot_datasets(self): for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: + print( + f"\n======== testing few-shot dataset {few_shot_dataset_cls.__name__} ========" + ) few_shot_dataset = few_shot_dataset_cls( - **few_shot_dataset_init_kwargs, + **vars(few_shot_dataset_init_kwargs), ) def test_finetune_datasets(self): for finetune_dataset_cls, finetune_dataset_init_kwargs in DATASETS: - finetune_dataset = finetune_dataset_cls(**finetune_dataset_init_kwargs) + print( + f"\n======== testing finetune dataset {finetune_dataset_cls.__name__} ========" + ) + finetune_dataset = finetune_dataset_cls( + **vars(finetune_dataset_init_kwargs) + ) def create_data_module_init_kwargs( - dataset_init_kwargs: Dict, is_few_shot: bool + dataset_init_kwargs: TestDatasetInitKwargs, is_few_shot: bool ) -> Dict: - data_module_init_kwargs = dataset_init_kwargs.copy() + dataset_init_kwargs_dict = vars(dataset_init_kwargs) + data_module_init_kwargs_dict = dataset_init_kwargs_dict.copy() - data_module_init_kwargs["batch_size"] = 1 + data_module_init_kwargs_dict["batch_size"] = 1 - data_module_init_kwargs["val_file_path"] = data_module_init_kwargs["file_path"] - data_module_init_kwargs["val_batch_size"] = 1 + data_module_init_kwargs_dict["val_file_path"] = data_module_init_kwargs_dict[ + "file_path" + ] + data_module_init_kwargs_dict["val_batch_size"] = 1 if not is_few_shot: - data_module_init_kwargs["train_file_path"] = data_module_init_kwargs[ + data_module_init_kwargs_dict["train_file_path"] = data_module_init_kwargs_dict[ "file_path" ] - del data_module_init_kwargs["file_path"] - del data_module_init_kwargs["mode"] - return data_module_init_kwargs + del data_module_init_kwargs_dict["file_path"] + del data_module_init_kwargs_dict["mode"] + return data_module_init_kwargs_dict class TestDataModules(unittest.TestCase): def test_few_shot_data_modules(self): # instantiate each few shot dataset as part of a data module for few_shot_dataset_cls, few_shot_dataset_init_kwargs in FEW_SHOT_DATASETS: + print( + f"\n======== testing few-shot DataModule with {few_shot_dataset_cls.__name__} ========" + ) few_shot_dataset_cls_str = few_shot_dataset_cls.__name__ few_shot_data_module_init_kwargs = create_data_module_init_kwargs( few_shot_dataset_init_kwargs, True @@ -78,6 +92,9 @@ def test_few_shot_data_modules(self): def test_finetune_data_modules(self): # instantiate each few shot dataset as part of a data module for finetune_dataset_cls, finetune_dataset_init_kwargs in DATASETS: + print( + f"\n======== testing finetune DataModule with {finetune_dataset_cls.__name__} ========" + ) finetune_dataset_cls_str = finetune_dataset_cls.__name__ finetune_data_module_init_kwargs = create_data_module_init_kwargs( finetune_dataset_init_kwargs, False From ab7e4cd0b7015d7f8f8943710acc4aa49a6c3e43 Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Sat, 29 Apr 2023 00:11:02 -0400 Subject: [PATCH 10/12] verbose test logs --- .github/workflows/main.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e114362b..4bcc07cb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -129,19 +129,19 @@ jobs: run: | . .venv/bin/activate ls -la - python -m unittest tests/test_datasets.py + python -m unittest -v tests/test_datasets.py - name: Test executors run: | . .venv/bin/activate - python -m unittest tests/test_executors.py + python -m unittest -v tests/test_executors.py - name: Test inference pipeline run: | . .venv/bin/activate - python -m unittest tests/test_inference_pipeline.py + python -m unittest -v tests/test_inference_pipeline.py - name: Test models run: | . .venv/bin/activate - python -m unittest tests/test_models.py \ No newline at end of file + python -m unittest -v tests/test_models.py \ No newline at end of file From 727ac551d78ba718459b76b244cbfe1857d66da5 Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Mon, 1 May 2023 13:12:54 -0400 Subject: [PATCH 11/12] Expand executor test suite --- tests/consts.py | 62 +++++++++++++++++++++++++++++++++++++++-- tests/test_executors.py | 20 +++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index 6d76844b..788d889e 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -20,11 +20,20 @@ SpiderDataset, ) +from execution.executors import ( + BaseExecutor, + MathExecutor, + MBPPExecutor, + SpiderExecutor, + WTQExecutor, +) + + # TODO: use special test string for test transformer model name? (don't load model) TEST_TRANSFORMER_MODEL_NAME = "EleutherAI/gpt-neo-125M" -# ======== datasets ======== +# ======== datasets ======== # defines kwargs needed to initialize NL2CodeDataset class TestDatasetInitKwargs: @@ -116,12 +125,61 @@ def __init__( ), ] + # ======== models ======== -TEST_MODEL_TRANSFORMER_MODEL_NAMES = [ +TEST_MODEL_TRANSFORMER_MODEL_NAMES: List[str] = [ "EleutherAI/gpt-neo-125M", "Salesforce/codet5-small", "Salesforce/codegen-350M-multi", ] TEST_MODEL_EXECUTOR_CLS = "execution.executors.MathExecutor" + + +# ======== executors ======== + +TEST_PROGRAM = "answer = 5" + +# Tuple[ExecutorCls, program, example] +TEST_EXECUTORS: List[Tuple[BaseExecutor, str, Dict]] = [ + ( + MathExecutor, + TEST_PROGRAM, + { + "question": "some question", + "answer": 5, + }, + ), + ( + MBPPExecutor, + TEST_PROGRAM, + { + "question": "some question", + "answer": 5, + "code": "return 5", + "task_id": "xyz", + "test_setup_code": 'print("setup")', + "test_list": ["assert 1+1 == 2", "assert 1+1 != 3"], + }, + ), + ( + SpiderExecutor, + TEST_PROGRAM, + { + "question": "some question", + "db_id": "my_db_id", + "query": "SELECT * FROM table", + }, + ), + ( + WTQExecutor, + TEST_PROGRAM, + { + "question": "some question", + "db_id": "my_db_id", + "db_path": "path/to/my/db", + "original_answer": 5, + }, + ), +] diff --git a/tests/test_executors.py b/tests/test_executors.py index cbe68366..814fcafe 100644 --- a/tests/test_executors.py +++ b/tests/test_executors.py @@ -6,9 +6,29 @@ sys.path.append(ROOT_DIR) from execution.executors import MathExecutor +from tests.consts import TEST_EXECUTORS class TestExecutors(unittest.TestCase): + def test_executors(self): + for executor_cls, test_program, test_example in TEST_EXECUTORS: + print(f"\n======== testing {executor_cls.__name__} ========") + executor = executor_cls() + + print(test_program) + print(test_example) + + try: + exec_match, exec_results = executor.exec_program( + test_program, test_example + ) + self.assertIsInstance(exec_match, int) + print(exec_results) + # TODO: use real DB connections + except: + self.assertIsInstance(exec_match, int) + print(exec_results) + def test_math_executor(self): executor = MathExecutor() From db8461b7e3a9afcae5333b2bec43218af385a30a Mon Sep 17 00:00:00 2001 From: troyfeng116 Date: Mon, 1 May 2023 13:29:17 -0400 Subject: [PATCH 12/12] add more models for pipeline tests --- tests/consts.py | 26 +++++++++++++++++++ tests/test_executors.py | 2 ++ tests/test_inference_pipeline.py | 44 +++++++++++++++++--------------- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/tests/consts.py b/tests/consts.py index 788d889e..93c20d4c 100644 --- a/tests/consts.py +++ b/tests/consts.py @@ -183,3 +183,29 @@ def __init__( }, ), ] + + +# ======== integration ======== + +TEST_PIPELINE_YAML_CONFIG_FILE = "tests/test_configs/few_shot-pipeline.yaml" + +# TODO: more datasets (see SummerTime matrix) +# each tuple contains model_name, Pytorch Lightning config YAML file, val_file_path +TEST_PIPELINE_INFO: List[Tuple[str, str, str]] = [ + ( + "EleutherAI/gpt-neo-125M", + TEST_PIPELINE_YAML_CONFIG_FILE, + "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl", + ), + # TODO: tensor dimension mismatch error for codet5-small (probably config file problem) + # ( + # "Salesforce/codet5-small", + # TEST_PIPELINE_YAML_CONFIG_FILE, + # "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl", + # ), + ( + "Salesforce/codegen-350M-multi", + TEST_PIPELINE_YAML_CONFIG_FILE, + "$NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl", + ), +] diff --git a/tests/test_executors.py b/tests/test_executors.py index 814fcafe..a3f78bf8 100644 --- a/tests/test_executors.py +++ b/tests/test_executors.py @@ -29,6 +29,8 @@ def test_executors(self): self.assertIsInstance(exec_match, int) print(exec_results) + # custom tests for specific executors + def test_math_executor(self): executor = MathExecutor() diff --git a/tests/test_inference_pipeline.py b/tests/test_inference_pipeline.py index 247c4a8b..102f2e55 100644 --- a/tests/test_inference_pipeline.py +++ b/tests/test_inference_pipeline.py @@ -2,27 +2,31 @@ import unittest # get the data directory from the environment variable -DATA_DIR = os.environ.get('DATA_DIR') +DATA_DIR = os.environ.get("DATA_DIR") + +from tests.consts import TEST_PIPELINE_INFO + class TestDecOnlyModelInference(unittest.TestCase): def test_basic(self): - exit_code = os.system("export PYTHONPATH=`pwd`; echo $PYTHONPATH; echo $NLP4CODE_TEST_DATA_PATH; " + \ - "python finetuning/trainer.py validate " + \ - "--config finetuning/training_configs/few_shot/gsmath.yaml " + \ - # still using CPU for now - "--trainer.gpus 0 " + \ - "--trainer.accelerator cpu " + \ - "--trainer.precision 32 " + \ - "--model.init_args.print_eval_every_n_batches 1 " + \ - "--model.init_args.transformer_model_name EleutherAI/gpt-neo-125M " + \ - "--data.init_args.transformer_model_name EleutherAI/gpt-neo-125M " + \ - "--data.init_args.val_max_instances 2 " + \ - # "--data.init_args.val_batch_size 1 ") - "--data.init_args.val_batch_size 1 " + \ - "--data.init_args.val_file_path $NLP4CODE_TEST_DATA_PATH/gsmath/split_dev.jsonl " - ) - - self.assertEqual(exit_code, 0) + for model_name, yaml_config_path, val_file_path in TEST_PIPELINE_INFO: + exit_code = os.system( + "export PYTHONPATH=`pwd`; echo $PYTHONPATH; echo $NLP4CODE_TEST_DATA_PATH; " + + "python finetuning/trainer.py validate " + + f"--config {yaml_config_path} " + + "--trainer.gpus 0 " # still using CPU for now + + "--trainer.accelerator cpu " + + "--trainer.precision 32 " + + "--model.init_args.print_eval_every_n_batches 1 " + + f"--model.init_args.transformer_model_name {model_name} " + + f"--data.init_args.transformer_model_name {model_name} " + + "--data.init_args.val_max_instances 2 " + + "--data.init_args.val_batch_size 1 " # "--data.init_args.val_batch_size 1 ") + + f"--data.init_args.val_file_path {val_file_path} " + ) + + self.assertEqual(exit_code, 0) + -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main()