From 8983f6af4f262cc2c7ef5846132bb49d9a5aba53 Mon Sep 17 00:00:00 2001 From: Smarth Gupta Date: Thu, 7 May 2026 08:52:24 +0000 Subject: [PATCH 1/5] Updating directory structure and process script --- .../private_school/manifest.json | 31 +++++++++ .../private_school_stats/manifest.json | 31 +++++++++ .../demographics/private_school/process.py | 64 ++++++++++++++----- 3 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 scripts/us_nces/demographics/private_school/private_school/manifest.json create mode 100644 scripts/us_nces/demographics/private_school/private_school_stats/manifest.json diff --git a/scripts/us_nces/demographics/private_school/private_school/manifest.json b/scripts/us_nces/demographics/private_school/private_school/manifest.json new file mode 100644 index 0000000000..79f218d650 --- /dev/null +++ b/scripts/us_nces/demographics/private_school/private_school/manifest.json @@ -0,0 +1,31 @@ +{ + "import_specifications": [ + { + "import_name": "NCES_PrivateSchool", + "curator_emails": [ + "support@datacommons.org" + ], + "provenance_url": "https://nces.ed.gov/ccd/elsi/tableGenerator.aspx", + "provenance_description": "US nces school data for private", + "scripts": [ + "../run.sh", + "../process.py --place" + ], + "source_files": [ + "../gcs_folder/input_files/*/*.csv" + ], + "import_inputs": [ + { + "template_mcf": "../gcs_folder/output_place/us_nces_demographics_private_place.tmcf", + "cleaned_csv": "../gcs_folder/output_place/us_nces_demographics_private_place.csv" + } + ], + "cron_schedule": "30 3 1 3,6,9,12 *", + "resource_limits": { + "cpu": 32, + "memory": 512, + "disk": 300 + } + } + ] +} diff --git a/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json b/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json new file mode 100644 index 0000000000..78caf806b6 --- /dev/null +++ b/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json @@ -0,0 +1,31 @@ +{ + "import_specifications": [ + { + "import_name": "NCES_PrivateSchoolStats", + "curator_emails": [ + "support@datacommons.org" + ], + "provenance_url": "https://nces.ed.gov/ccd/elsi/tableGenerator.aspx", + "provenance_description": "US nces school data for private", + "scripts": [ + "../run.sh", + "../process.py --stats" + ], + "source_files": [ + "../gcs_folder/input_files/*/*.csv" + ], + "import_inputs": [ + { + "template_mcf": "../gcs_folder/output_files/us_nces_demographics_private_school.tmcf", + "cleaned_csv": "../gcs_folder/output_files/us_nces_demographics_private_school.csv" + } + ], + "cron_schedule": "30 3 1 3,6,9,12 *", + "resource_limits": { + "cpu": 32, + "memory": 512, + "disk": 300 + } + } + ] +} diff --git a/scripts/us_nces/demographics/private_school/process.py b/scripts/us_nces/demographics/private_school/process.py index 5891e4e653..50eba113d9 100644 --- a/scripts/us_nces/demographics/private_school/process.py +++ b/scripts/us_nces/demographics/private_school/process.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -22,25 +22,28 @@ """ import os -import shutil import sys -from absl import flags -from absl import app -from absl import logging import warnings +from absl import app, flags, logging warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) -warnings.simplefilter(action='ignore', category=FutureWarning) + MODULE_DIR = os.path.dirname(__file__) sys.path.insert(1, MODULE_DIR + '/../..') + from common.us_education import USEducation from config import * +# Define Flags +FLAGS = flags.FLAGS +flags.DEFINE_bool('stats', False, 'Generate Stats data (CSV, MCF, TMCF).') +flags.DEFINE_bool('place', False, 'Generate Place data (CSV, TMCF).') + class NCESPrivateSchool(USEducation): """ - This Class has requried methods to generate Cleaned CSV, + This Class has required methods to generate Cleaned CSV, MCF and TMCF Files. """ _import_name = SCHOOL_TYPE @@ -65,14 +68,23 @@ def set_generate_statvars_flag(self, flag: bool): self._generate_statvars = flag -if __name__ == '__main__': +def main(argv): + # 1. Flag Validation: Throw error if no flags are used + if not FLAGS.stats and not FLAGS.place: + raise app.UsageError( + "No execution flag provided. You must specify either --stats or --place." + ) + try: logging.set_verbosity(logging.INFO) - logging.info("Main Method Starts For Private School District ") + logging.info("Main Method Starts For Private School District") + + # Path Setup gcs_output_dir_local = os.path.join( os.path.dirname(os.path.abspath(__file__)), "gcs_folder") input_path_base = os.path.join(gcs_output_dir_local, "input_files") os.makedirs(input_path_base, exist_ok=True) + input_files_to_process = [] if os.path.exists(input_path_base): for year_folder_name in sorted(os.listdir(input_path_base)): @@ -86,8 +98,10 @@ def set_generate_statvars_flag(self, flag: bool): if not input_files_to_process: logging.warning( - f"No CSV files found in {input_path_base} or its year subfolders. Please ensure download_input_files.py has been run and placed files correctly." + f"No CSV files found in {input_path_base}. Ensure download_input_files.py was run." ) + + # Output Directories output_file_path = os.path.join(gcs_output_dir_local, "output_files") os.makedirs(output_file_path, exist_ok=True) @@ -95,6 +109,7 @@ def set_generate_statvars_flag(self, flag: bool): "output_place") os.makedirs(output_file_path_place, exist_ok=True) + # File Paths cleaned_csv_path = os.path.join(output_file_path, CSV_FILE_NAME) mcf_path = os.path.join(output_file_path, MCF_FILE_NAME) tmcf_path = os.path.join(output_file_path, TMCF_FILE_NAME) @@ -103,13 +118,32 @@ def set_generate_statvars_flag(self, flag: bool): CSV_DUPLICATE_NAME) tmcf_path_place = os.path.join(output_file_path_place, TMCF_FILE_PLACE) + # Initialize Loader loader = NCESPrivateSchool(input_files_to_process, cleaned_csv_path, mcf_path, tmcf_path, cleaned_csv_place, duplicate_csv_place, tmcf_path_place) - loader.generate_csv() - loader.generate_mcf() - loader.generate_tmcf() - logging.info("Main Method Completed For Private School District ") + # 2. Conditional Execution based on flags + if FLAGS.stats: + logging.info("Triggering Stats Import...") + loader.set_generate_statvars_flag(True) + loader.generate_csv() + loader.generate_mcf() + loader.generate_tmcf() + + if FLAGS.place: + logging.info("Triggering Place Import...") + # Disable statvars so processing focuses on entity/place data + loader.set_generate_statvars_flag(False) + loader.generate_csv() + loader.generate_tmcf() + + logging.info("Main Method Completed For Private School District") + except Exception as e: - logging.fatal(f"Error While Running Private School Process: {e} ") + logging.fatal(f"Error While Running Private School Process: {e}") + + +if __name__ == '__main__': + app.run(main) + \ No newline at end of file From 9901a1de9db9fb458c1ee70f4c780077732114cc Mon Sep 17 00:00:00 2001 From: Smarth Gupta Date: Mon, 11 May 2026 03:22:03 +0000 Subject: [PATCH 2/5] updating process script --- .../demographics/private_school/manifest.json | 35 ---------- .../manifest.json | 7 +- .../private_school_stats/manifest.json | 9 +-- .../demographics/private_school/process.py | 64 +++++-------------- 4 files changed, 18 insertions(+), 97 deletions(-) delete mode 100644 scripts/us_nces/demographics/private_school/manifest.json rename scripts/us_nces/demographics/private_school/{private_school => private_school_place}/manifest.json (82%) diff --git a/scripts/us_nces/demographics/private_school/manifest.json b/scripts/us_nces/demographics/private_school/manifest.json deleted file mode 100644 index 195b29fb2e..0000000000 --- a/scripts/us_nces/demographics/private_school/manifest.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "import_specifications": [ - { - "import_name": "NCES_PrivateSchool", - "curator_emails": [ - "support@datacommons.org" - ], - "provenance_url": "https://nces.ed.gov/ccd/elsi/tableGenerator.aspx", - "provenance_description": "US nces school data for private", - "scripts": [ - "run.sh", - "process.py" - ], - "source_files": [ - "gcs_folder/input_files/*/*.csv" - ], - "import_inputs": [ - { - "template_mcf": "gcs_folder/output_files/us_nces_demographics_private_school.tmcf", - "cleaned_csv": "gcs_folder/output_files/us_nces_demographics_private_school.csv" - }, - { - "template_mcf": "gcs_folder/output_place/us_nces_demographics_private_place.tmcf", - "cleaned_csv": "gcs_folder/output_place/us_nces_demographics_private_place.csv" - } - ], - "cron_schedule": "30 3 1 3,6,9,12 *", - "resource_limits": { - "cpu": 32, - "memory": 512, - "disk": 300 - } - } - ] -} diff --git a/scripts/us_nces/demographics/private_school/private_school/manifest.json b/scripts/us_nces/demographics/private_school/private_school_place/manifest.json similarity index 82% rename from scripts/us_nces/demographics/private_school/private_school/manifest.json rename to scripts/us_nces/demographics/private_school/private_school_place/manifest.json index 79f218d650..ab23fb30df 100644 --- a/scripts/us_nces/demographics/private_school/private_school/manifest.json +++ b/scripts/us_nces/demographics/private_school/private_school_place/manifest.json @@ -20,12 +20,7 @@ "cleaned_csv": "../gcs_folder/output_place/us_nces_demographics_private_place.csv" } ], - "cron_schedule": "30 3 1 3,6,9,12 *", - "resource_limits": { - "cpu": 32, - "memory": 512, - "disk": 300 - } + "cron_schedule": "30 3 1 3,6,9,12 *" } ] } diff --git a/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json b/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json index 78caf806b6..dfad982454 100644 --- a/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json +++ b/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json @@ -3,7 +3,7 @@ { "import_name": "NCES_PrivateSchoolStats", "curator_emails": [ - "support@datacommons.org" + "" ], "provenance_url": "https://nces.ed.gov/ccd/elsi/tableGenerator.aspx", "provenance_description": "US nces school data for private", @@ -20,12 +20,7 @@ "cleaned_csv": "../gcs_folder/output_files/us_nces_demographics_private_school.csv" } ], - "cron_schedule": "30 3 1 3,6,9,12 *", - "resource_limits": { - "cpu": 32, - "memory": 512, - "disk": 300 - } + "cron_schedule": "30 3 1 3,6,9,12 *" } ] } diff --git a/scripts/us_nces/demographics/private_school/process.py b/scripts/us_nces/demographics/private_school/process.py index 50eba113d9..5891e4e653 100644 --- a/scripts/us_nces/demographics/private_school/process.py +++ b/scripts/us_nces/demographics/private_school/process.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -22,28 +22,25 @@ """ import os +import shutil import sys +from absl import flags +from absl import app +from absl import logging import warnings -from absl import app, flags, logging warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) - +warnings.simplefilter(action='ignore', category=FutureWarning) MODULE_DIR = os.path.dirname(__file__) sys.path.insert(1, MODULE_DIR + '/../..') - from common.us_education import USEducation from config import * -# Define Flags -FLAGS = flags.FLAGS -flags.DEFINE_bool('stats', False, 'Generate Stats data (CSV, MCF, TMCF).') -flags.DEFINE_bool('place', False, 'Generate Place data (CSV, TMCF).') - class NCESPrivateSchool(USEducation): """ - This Class has required methods to generate Cleaned CSV, + This Class has requried methods to generate Cleaned CSV, MCF and TMCF Files. """ _import_name = SCHOOL_TYPE @@ -68,23 +65,14 @@ def set_generate_statvars_flag(self, flag: bool): self._generate_statvars = flag -def main(argv): - # 1. Flag Validation: Throw error if no flags are used - if not FLAGS.stats and not FLAGS.place: - raise app.UsageError( - "No execution flag provided. You must specify either --stats or --place." - ) - +if __name__ == '__main__': try: logging.set_verbosity(logging.INFO) - logging.info("Main Method Starts For Private School District") - - # Path Setup + logging.info("Main Method Starts For Private School District ") gcs_output_dir_local = os.path.join( os.path.dirname(os.path.abspath(__file__)), "gcs_folder") input_path_base = os.path.join(gcs_output_dir_local, "input_files") os.makedirs(input_path_base, exist_ok=True) - input_files_to_process = [] if os.path.exists(input_path_base): for year_folder_name in sorted(os.listdir(input_path_base)): @@ -98,10 +86,8 @@ def main(argv): if not input_files_to_process: logging.warning( - f"No CSV files found in {input_path_base}. Ensure download_input_files.py was run." + f"No CSV files found in {input_path_base} or its year subfolders. Please ensure download_input_files.py has been run and placed files correctly." ) - - # Output Directories output_file_path = os.path.join(gcs_output_dir_local, "output_files") os.makedirs(output_file_path, exist_ok=True) @@ -109,7 +95,6 @@ def main(argv): "output_place") os.makedirs(output_file_path_place, exist_ok=True) - # File Paths cleaned_csv_path = os.path.join(output_file_path, CSV_FILE_NAME) mcf_path = os.path.join(output_file_path, MCF_FILE_NAME) tmcf_path = os.path.join(output_file_path, TMCF_FILE_NAME) @@ -118,32 +103,13 @@ def main(argv): CSV_DUPLICATE_NAME) tmcf_path_place = os.path.join(output_file_path_place, TMCF_FILE_PLACE) - # Initialize Loader loader = NCESPrivateSchool(input_files_to_process, cleaned_csv_path, mcf_path, tmcf_path, cleaned_csv_place, duplicate_csv_place, tmcf_path_place) - # 2. Conditional Execution based on flags - if FLAGS.stats: - logging.info("Triggering Stats Import...") - loader.set_generate_statvars_flag(True) - loader.generate_csv() - loader.generate_mcf() - loader.generate_tmcf() - - if FLAGS.place: - logging.info("Triggering Place Import...") - # Disable statvars so processing focuses on entity/place data - loader.set_generate_statvars_flag(False) - loader.generate_csv() - loader.generate_tmcf() - - logging.info("Main Method Completed For Private School District") - + loader.generate_csv() + loader.generate_mcf() + loader.generate_tmcf() + logging.info("Main Method Completed For Private School District ") except Exception as e: - logging.fatal(f"Error While Running Private School Process: {e}") - - -if __name__ == '__main__': - app.run(main) - \ No newline at end of file + logging.fatal(f"Error While Running Private School Process: {e} ") From 0cc1b5150998628e22901cb8e9e8e6bd18c64c71 Mon Sep 17 00:00:00 2001 From: Smarth Gupta Date: Mon, 11 May 2026 06:13:16 +0000 Subject: [PATCH 3/5] changes in process script --- .../demographics/private_school/process.py | 62 ++++++++++++++----- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/scripts/us_nces/demographics/private_school/process.py b/scripts/us_nces/demographics/private_school/process.py index 5891e4e653..fa58c8d500 100644 --- a/scripts/us_nces/demographics/private_school/process.py +++ b/scripts/us_nces/demographics/private_school/process.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -22,25 +22,27 @@ """ import os -import shutil import sys -from absl import flags -from absl import app -from absl import logging import warnings +from absl import app, flags, logging warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) -warnings.simplefilter(action='ignore', category=FutureWarning) + MODULE_DIR = os.path.dirname(__file__) sys.path.insert(1, MODULE_DIR + '/../..') from common.us_education import USEducation from config import * +# Define Flags +FLAGS = flags.FLAGS +flags.DEFINE_bool('stats', False, 'Generate Stats data (CSV, MCF, TMCF).') +flags.DEFINE_bool('place', False, 'Generate Place data (CSV, TMCF).') + class NCESPrivateSchool(USEducation): """ - This Class has requried methods to generate Cleaned CSV, + This Class has required methods to generate Cleaned CSV, MCF and TMCF Files. """ _import_name = SCHOOL_TYPE @@ -65,14 +67,23 @@ def set_generate_statvars_flag(self, flag: bool): self._generate_statvars = flag -if __name__ == '__main__': +def main(argv): + # Flag Validation: Throw error if no flags are used + if not FLAGS.stats and not FLAGS.place: + raise app.UsageError( + "No execution flag provided. You must specify either --stats or --place." + ) + try: logging.set_verbosity(logging.INFO) - logging.info("Main Method Starts For Private School District ") + logging.info("Main Method Starts For Private School District") + + # Path Setup gcs_output_dir_local = os.path.join( os.path.dirname(os.path.abspath(__file__)), "gcs_folder") input_path_base = os.path.join(gcs_output_dir_local, "input_files") os.makedirs(input_path_base, exist_ok=True) + input_files_to_process = [] if os.path.exists(input_path_base): for year_folder_name in sorted(os.listdir(input_path_base)): @@ -86,8 +97,10 @@ def set_generate_statvars_flag(self, flag: bool): if not input_files_to_process: logging.warning( - f"No CSV files found in {input_path_base} or its year subfolders. Please ensure download_input_files.py has been run and placed files correctly." + f"No CSV files found in {input_path_base}. Ensure download_input_files.py was run." ) + + # Output Directories output_file_path = os.path.join(gcs_output_dir_local, "output_files") os.makedirs(output_file_path, exist_ok=True) @@ -95,6 +108,7 @@ def set_generate_statvars_flag(self, flag: bool): "output_place") os.makedirs(output_file_path_place, exist_ok=True) + # File Paths cleaned_csv_path = os.path.join(output_file_path, CSV_FILE_NAME) mcf_path = os.path.join(output_file_path, MCF_FILE_NAME) tmcf_path = os.path.join(output_file_path, TMCF_FILE_NAME) @@ -103,13 +117,31 @@ def set_generate_statvars_flag(self, flag: bool): CSV_DUPLICATE_NAME) tmcf_path_place = os.path.join(output_file_path_place, TMCF_FILE_PLACE) + # Initialize Loader loader = NCESPrivateSchool(input_files_to_process, cleaned_csv_path, mcf_path, tmcf_path, cleaned_csv_place, duplicate_csv_place, tmcf_path_place) - loader.generate_csv() - loader.generate_mcf() - loader.generate_tmcf() - logging.info("Main Method Completed For Private School District ") + # Conditional Execution based on flags + if FLAGS.stats: + logging.info("Triggering Stats Import...") + loader.set_generate_statvars_flag(True) + loader.generate_csv() + loader.generate_mcf() + loader.generate_tmcf() + + if FLAGS.place: + logging.info("Triggering Place Import...") + # Disable statvars so processing focuses on entity/place data + loader.set_generate_statvars_flag(False) + loader.generate_csv() + loader.generate_tmcf() + + logging.info("Main Method Completed For Private School District") + except Exception as e: - logging.fatal(f"Error While Running Private School Process: {e} ") + logging.fatal(f"Error While Running Private School Process: {e}") + + +if __name__ == '__main__': + app.run(main) From 8b2ca0e39d85ec2718a121e25807ff0d69f07683 Mon Sep 17 00:00:00 2001 From: Smarth Gupta Date: Mon, 11 May 2026 07:16:17 +0000 Subject: [PATCH 4/5] udapting process script --- scripts/us_nces/demographics/private_school/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/us_nces/demographics/private_school/process.py b/scripts/us_nces/demographics/private_school/process.py index fa58c8d500..bec382656c 100644 --- a/scripts/us_nces/demographics/private_school/process.py +++ b/scripts/us_nces/demographics/private_school/process.py @@ -140,7 +140,7 @@ def main(argv): logging.info("Main Method Completed For Private School District") except Exception as e: - logging.fatal(f"Error While Running Private School Process: {e}") + logging.fatal(f"Error While Running Private School Process: {e}", exc_info=True) if __name__ == '__main__': From f639ef933e3b44909f637838b5db140eb750750b Mon Sep 17 00:00:00 2001 From: Smarth Gupta Date: Wed, 13 May 2026 18:12:04 +0000 Subject: [PATCH 5/5] updating files --- .../{ => NCES_PrivateSchoolStats}/manifest.json | 16 ++++++++++------ .../us_nces/demographics/private_school/run.sh | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) rename scripts/us_nces/demographics/private_school/private_school_stats/{ => NCES_PrivateSchoolStats}/manifest.json (50%) diff --git a/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json b/scripts/us_nces/demographics/private_school/private_school_stats/NCES_PrivateSchoolStats/manifest.json similarity index 50% rename from scripts/us_nces/demographics/private_school/private_school_stats/manifest.json rename to scripts/us_nces/demographics/private_school/private_school_stats/NCES_PrivateSchoolStats/manifest.json index dfad982454..5fb2dc29b6 100644 --- a/scripts/us_nces/demographics/private_school/private_school_stats/manifest.json +++ b/scripts/us_nces/demographics/private_school/private_school_stats/NCES_PrivateSchoolStats/manifest.json @@ -8,19 +8,23 @@ "provenance_url": "https://nces.ed.gov/ccd/elsi/tableGenerator.aspx", "provenance_description": "US nces school data for private", "scripts": [ - "../run.sh", - "../process.py --stats" + "../../run.sh" ], "source_files": [ - "../gcs_folder/input_files/*/*.csv" + "../../gcs_folder/input_files/*/*.csv" ], "import_inputs": [ { - "template_mcf": "../gcs_folder/output_files/us_nces_demographics_private_school.tmcf", - "cleaned_csv": "../gcs_folder/output_files/us_nces_demographics_private_school.csv" + "template_mcf": "../../gcs_folder/output_files/us_nces_demographics_private_school.tmcf", + "cleaned_csv": "../../gcs_folder/output_files/us_nces_demographics_private_school.csv" } ], - "cron_schedule": "30 3 1 3,6,9,12 *" + "cron_schedule": "30 3 1 3,6,9,12 *", + "resource_limits": { + "cpu": 32, + "memory": 512, + "disk": 300 + } } ] } diff --git a/scripts/us_nces/demographics/private_school/run.sh b/scripts/us_nces/demographics/private_school/run.sh index f4dfb017f2..f2a45a91e3 100644 --- a/scripts/us_nces/demographics/private_school/run.sh +++ b/scripts/us_nces/demographics/private_school/run.sh @@ -1,2 +1,14 @@ -mkdir -p gcs_folder/input_files -gcloud storage cp --recursive gs://unresolved_mcf/us_nces/demographics/private_school/semi_automation_input_files/* gcs_folder/input_files/ +#!/bin/bash + +# 1. Get the absolute path to the 'private_school' directory +BASE_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +# 3. Create the actual data storage folder in the base directory +mkdir -p "$BASE_DIR/gcs_folder/input_files" + +# 4. Download files +gcloud storage cp --recursive "gs://unresolved_mcf/us_nces/demographics/private_school/semi_automation_input_files/*" "$BASE_DIR/gcs_folder/input_files/" + +# 5. Run the process +cd "$BASE_DIR" +python process.py --stats