From 422c47aaf1c9cb50a4afb04f25eb7c0e590a8d1b Mon Sep 17 00:00:00 2001 From: Masataro Asai Date: Thu, 26 Dec 2024 12:54:54 -0500 Subject: [PATCH 1/3] updated the docstrings of template generator/validator --- templates/generator/template.py | 2 +- templates/validator/template.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/generator/template.py b/templates/generator/template.py index f83e3948..e0c712ce 100644 --- a/templates/generator/template.py +++ b/templates/generator/template.py @@ -12,7 +12,7 @@ @register_block("template_generator") class TemplateGenerator(BaseGeneratorBlock): - """Base Class for all Generators""" + """TODO: Copy and edit this template to implement your own generator class""" def __init__(self, name: str, config: Dict, **kwargs: Any) -> None: super().__init__(name, config, **kwargs) diff --git a/templates/validator/template.py b/templates/validator/template.py index 92da5e6a..485c403d 100644 --- a/templates/validator/template.py +++ b/templates/validator/template.py @@ -12,7 +12,7 @@ @register_block("template_validator") class TemplateValidator(BaseValidatorBlock): - """Base Class for all Validators""" + """TODO: Copy and edit this template to implement your own validator class""" def __init__(self, name: str, config: Dict) -> None: super().__init__(name, config) From 3bfe3f52ea9ac228bb4e0df26fc5a1d463aff46d Mon Sep 17 00:00:00 2001 From: Masataro Asai Date: Thu, 26 Dec 2024 13:46:57 -0500 Subject: [PATCH 2/3] updated the docstring for DefaultDatastore --- fms_dgt/datastores/default.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fms_dgt/datastores/default.py b/fms_dgt/datastores/default.py index 512bcfee..65dbcd10 100644 --- a/fms_dgt/datastores/default.py +++ b/fms_dgt/datastores/default.py @@ -20,7 +20,22 @@ @register_datastore("default") class DefaultDatastore(BaseDatastore): - """Base Class for all data stores""" + """ + The default datastore is a data store that takes a name of a `.jsonl`, + `.json`, `.yaml`, or `parquet` file, loads it, and returns an iterator + over it. + + If none of the filename extension matches, it attempts to give the first + argument to huggingface's `datasets.load_dataset` function to load the data, and returns an iterator over it. + + It also has an ability to dump the data into a file. + + @param output_dir : The destination directory for writing the data. + @param data_format : The data format for writing the data. It is not used for loading; The format is guessed from data_path. + @param data : A list. Data loaded from the files are concatenated with this list. + @param data_path : The path to the file to load. If it is a list, it is interpreted as the arguments to huggingface's `datasets.load_dataset` . + + """ def __init__( self, From 4b8ec46699cd7bab7b1618cd73ff2ba81e4f964e Mon Sep 17 00:00:00 2001 From: Masataro Asai Date: Thu, 26 Dec 2024 13:53:39 -0500 Subject: [PATCH 3/3] updated the docstring for DefaultDataloader --- fms_dgt/dataloaders/default.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fms_dgt/dataloaders/default.py b/fms_dgt/dataloaders/default.py index eae5744a..17147144 100644 --- a/fms_dgt/dataloaders/default.py +++ b/fms_dgt/dataloaders/default.py @@ -10,7 +10,13 @@ @register_dataloader("default") class DefaultDataloader(BaseDataloader): - """Base Class for all dataloaders""" + """ + The default dataloader takes a datastore and iterates over it. + + @param state_datastore : A data store. + @param loop_over_data : If true, it wraps around when it reaches the end of the iterator. + + """ def __init__( self,